[ [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie - 27518206_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"629309709\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CLARISSE DEROLLEZ\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ang\\u00e9lique RINGARD\", \"beneficiary_dob\": null, \"invoice_date\": \"10-06-2023\", \"security_number\": null, \"invoice_issuer\": \"CLARISE DEROLEZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"10-06-2023\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1498644-copie-de-l-acte-de-divorce-d-antonio-580x0-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/quittance-de-loyer-exemple_1_scale_1.0.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/quittance-de-loyer_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RF MO ODO 2018 2 8_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB Epitech-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/lettre-justificatif-domicile-parents-L-w2s_7r_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB DU SIE SCEAUX-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/mobilenew2_1_scale_1.0.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/mobilenew3_1_scale_1.0.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/mobilenew_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Attestation de droits RO-25878827-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/facture anonyme_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie - 27581557_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie - 27581557_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Psychologue\", \"rpps_number\": \"10000895531\", \"doctor_name\": \"ALBERTO BLANQUER-LAGUARTA\", \"total_billed\": 78.5, \"bill_paid\": true, \"amount_paid\": 78.5, \"mandatory_coverage\": 42.5, \"complementary_coverage\": null, \"client_part\": 36.0, \"insured_name\": \"BOUZAT MARIE-CLAUDE\", \"insured_dob\": \"07-09-1960\", \"beneficiary_name\": \"BOUZAT MARIE-CLAUDE\", \"beneficiary_dob\": \"07-09-1960\", \"invoice_date\": \"23-06-2023\", \"security_number\": \"2600912208266\", \"invoice_issuer\": \"ALBERTO BLANQUER-LAGUARTA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CNP\", \"date_of_service\": \"23-06-2023\", \"mandatory_coverage\": 42.5, \"amount\": 78.5}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Psychiatrie\", \"rpps_number\": \"10000895531\", \"doctor_name\": \"ALBERTO BLANQUER-LAGUARTA\", \"total_billed\": 78.5, \"bill_paid\": true, \"amount_paid\": 78.5, \"mandatory_coverage\": 42.5, \"complementary_coverage\": null, \"client_part\": 36.0, \"insured_name\": \"BOUZAT MARIE-CLAUDE\", \"insured_dob\": \"07-09-1960\", \"beneficiary_name\": \"BOUZAT MARIE-CLAUDE\", \"beneficiary_dob\": \"07-09-1960\", \"invoice_date\": \"30-06-2023\", \"security_number\": \"2600912208266\", \"invoice_issuer\": \"ALBERTO BLANQUER-LAGUARTA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CNP\", \"date_of_service\": \"30-06-2023\", \"mandatory_coverage\": 42.5, \"amount\": 78.5}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie - 27641339_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"789327913\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie-Lucie GORGA\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Audrey LE MAUFF\", \"beneficiary_dob\": null, \"invoice_date\": \"07-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Marie-Lucie GORGA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretien th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"07-07-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED A-B-2481575_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"8900001472\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie-Laure Caussanel\", \"total_billed\": 160, \"bill_paid\": true, \"amount_paid\": 160, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BUYS Audrey\", \"beneficiary_dob\": \"01-03-1975\", \"invoice_date\": \"30-05-2023\", \"security_number\": null, \"invoice_issuer\": \"Marie-Laure Caussanel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"28-01-2023\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"s\\u00e9ance(s) de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"11-02-2023\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED A-B22-677661_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"029302213\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LEF\\u00c8VRE H\\u00e9l\\u00e8ne\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BEAURAIN Gabriel\", \"beneficiary_dob\": \"30-06-2011\", \"invoice_date\": \"12-05-2023\", \"security_number\": null, \"invoice_issuer\": \"LEF\\u00c8VRE H\\u00e9l\\u00e8ne El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"12-05-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED A-B22-677662_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"029302213\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LEF\\u00c8VRE H\\u00e9l\\u00e8ne\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BEAURAIN Th\\u00e9o\", \"beneficiary_dob\": \"10-12-2013\", \"invoice_date\": \"26-05-2023\", \"security_number\": null, \"invoice_issuer\": \"LEF\\u00c8VRE H\\u00e9l\\u00e8ne El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-05-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED K-M1-1228144_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"169300134\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nathalie LABARCHEDE\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"C\\u00e9line LE GUELLANFF\", \"beneficiary_dob\": null, \"invoice_date\": \"13-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Nathalie LABARCHEDE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"13-06-2023\", \"mandatory_coverage\": null, \"amount\": 120}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED N-R-2391367_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"769315011\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TABET Laarem\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PERRET Andr\\u00e9as\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2023\", \"security_number\": null, \"invoice_issuer\": \"TABET Laarem\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"17-04-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED N-R-2391368_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"769315011\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TABET Laarem\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PERRET Andr\\u00e9as\", \"beneficiary_dob\": null, \"invoice_date\": \"26-05-2023\", \"security_number\": null, \"invoice_issuer\": \"TABET Laarem\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-05-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED N-R-2391503_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED N-R-2391503_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"659301832\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maud GREFFIER\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gabriel Piques\", \"beneficiary_dob\": null, \"invoice_date\": \"11-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Maud GREFFIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"11-07-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED N-R1-941543_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"679300335\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HEINTZ Yannick\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROCHE Elisabeth\", \"beneficiary_dob\": null, \"invoice_date\": \"05-06-2023\", \"security_number\": null, \"invoice_issuer\": \"HEINTZ Yannick\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations du 23 mai et 5 juin 2023\", \"quantity\": 2, \"date_of_service\": \"23-06-2023\", \"mandatory_coverage\": null, \"amount\": 120}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED N-R1-946126_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"519302764\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Stephen Dehoul\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vincent Oberlin\", \"beneficiary_dob\": null, \"invoice_date\": \"26-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Stephen Dehoul - Psychologue Reims\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-06-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED N-R1-948960_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED N-R1-948960_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"609312582\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sophie NORMAND\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mickael PAYET\", \"beneficiary_dob\": null, \"invoice_date\": \"07-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Sophie NORMAND\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie du 07/07/2023\", \"quantity\": 1, \"date_of_service\": \"07-07-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED S-Z22-1003798_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"689309136\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"KLIMPEL GEOFFROY\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NATHANAEL SCHULLER\", \"beneficiary_dob\": null, \"invoice_date\": \"24-06-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET LIBERAL KLIMPEL GEOFFROY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretien psychologique\", \"quantity\": 1, \"date_of_service\": \"24-06-2023\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED S-Z22-1003799_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"689309136\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"KLIMPEL GEOFFROY\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NATHANAEL SCHULLER\", \"beneficiary_dob\": null, \"invoice_date\": \"24-07-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET LIBERAL KLIMPEL GEOFFROY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretien psychologique\", \"quantity\": 1, \"date_of_service\": \"24-07-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED S-Z22-1003800_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"689309136\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"KLIMPEL GEOFFROY\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NATHANAEL SCHULLER\", \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET LIBERAL KLIMPEL GEOFFROY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretien psychologique\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture psychologie-GED S-Z22-996335_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"759350168\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sandra Ferrara\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Enora LE BARS TERRAZA\", \"beneficiary_dob\": null, \"invoice_date\": \"02-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Sandra Ferrara\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"30-06-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27103224_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"2B9300348\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FRANCHI C\\u00e9line\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CLEMENTE NEVIA\", \"beneficiary_dob\": \"02-07-1977\", \"invoice_date\": \"02-01-2023\", \"security_number\": null, \"invoice_issuer\": \"CENTRE PSYCHOTHERAPIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance de Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"02-01-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27126402_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie-Laure CHOUKAIR\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laura Mycyszyn\", \"beneficiary_dob\": null, \"invoice_date\": \"13-01-2023\", \"security_number\": null, \"invoice_issuer\": \"Marie-Laure CHOUKAIR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"13-01-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27146817_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"749309787\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline SERALTA\", \"total_billed\": 60.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Gonzalez Aur\\u00e9lie\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2023\", \"security_number\": \"287029520305257\", \"invoice_issuer\": \"Caroline SERALTA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"27-01-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27148581_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"669302515\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CATHERINE BANET-BOUAZIZ\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Kais Zeggou-Cotte\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2023\", \"security_number\": null, \"invoice_issuer\": \"CATHERINE BANET-BOUAZIZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"08-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27151190_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"929317485\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cohen Nathalie\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nila Ramos\", \"beneficiary_dob\": null, \"invoice_date\": \"24-01-2023\", \"security_number\": null, \"invoice_issuer\": \"Cohen Nathalie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychologie individuelle\", \"quantity\": 1, \"date_of_service\": \"02-01-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"S\\u00e9ance de psychologie individuelle\", \"quantity\": 1, \"date_of_service\": \"24-01-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27162326_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"261048854\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VAUGAN VALERIE\", \"total_billed\": 105.0, \"bill_paid\": true, \"amount_paid\": 62.5, \"mandatory_coverage\": 42.5, \"complementary_coverage\": null, \"client_part\": 62.5, \"remaining_payment\": 0, \"insured_name\": \"SAMIRI BAHIJA\", \"insured_dob\": \"10-09-1973\", \"beneficiary_name\": \"SAMIRI BAHIJA\", \"beneficiary_dob\": \"10-09-1973\", \"invoice_date\": \"06-02-2023\", \"security_number\": \"2730999350481\", \"invoice_issuer\": \"Madame VAUGAN VALERIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CNP 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 42.5, \"amount\": 105}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27163243_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"599333044\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne Flahaut\", \"total_billed\": 70.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"D\\u00e9borah Roose\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2023\", \"security_number\": null, \"invoice_issuer\": \"Anne Flahaut El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27195007_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"919313718\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Claire LANGELIER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emma Thomi\\u00e8re\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2023\", \"security_number\": null, \"invoice_issuer\": \"Claire LANGELIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations psychologiques\", \"quantity\": 1, \"date_of_service\": \"31-01-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27195869_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"649309903\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Becerra Ang\\u00e9lique\", \"total_billed\": 147.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Campano Antoine\", \"beneficiary_dob\": null, \"invoice_date\": \"17-01-2023\", \"security_number\": null, \"invoice_issuer\": \"La Clinique E-sant\\u00e9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie \\u00e0 distance\", \"quantity\": 1, \"date_of_service\": \"17-01-2023\", \"mandatory_coverage\": null, \"amount\": 147.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27195883_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"939314530\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Benard Eliana\", \"total_billed\": 147.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Campano Antoine\", \"beneficiary_dob\": null, \"invoice_date\": \"17-02-2023\", \"security_number\": null, \"invoice_issuer\": \"La Clinique E-sant\\u00e9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie \\u00e0 distance\", \"quantity\": 1, \"date_of_service\": \"17-02-2023\", \"mandatory_coverage\": null, \"amount\": 147.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27196883_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"069317675\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Prune LEROY\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nicolas CARDONA\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2023\", \"security_number\": null, \"invoice_issuer\": \"Prune LEROY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"07-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27199895_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"599334174\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cassandra Carrier\", \"total_billed\": 240, \"bill_paid\": true, \"amount_paid\": 240, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 240, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Beaumont Julien\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Cassandra Carrier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Couple - Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"05-09-2022\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Couple - Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"12-09-2022\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Couple - Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"19-09-2022\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Couple - Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"26-09-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27199897_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"599334174\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cassandra Carrier\", \"total_billed\": 240, \"bill_paid\": true, \"amount_paid\": 240, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 240, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Beaumont Emilie\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Cassandra Carrier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Couple - Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"30-01-2023\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Couple - Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"06-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Couple - Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"13-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Couple - Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"20-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27200192_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dominique Brun\", \"total_billed\": 70, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Kevin Batilliot\", \"beneficiary_dob\": null, \"invoice_date\": \"22-02-2023\", \"security_number\": null, \"invoice_issuer\": \"M. Dominique BRUN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"22-02-2023\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27200660_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"La\\u00ebtitia LEVRERE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Johan LLOVERAS\", \"beneficiary_dob\": null, \"invoice_date\": \"22-02-2023\", \"security_number\": null, \"invoice_issuer\": \"La\\u00ebtitia LEVRERE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"22-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27201372_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"789319712\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emma DECOURCELLE\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"David PEREIRA\", \"beneficiary_dob\": null, \"invoice_date\": \"22-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Emma DECOURCELLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi th\\u00e9rapeutique\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27206664_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"319332011\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"No\\u00eblle SANCHEZ\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Elisa LAFONTAINE\", \"beneficiary_dob\": null, \"invoice_date\": \"18-02-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de Neuropsychologie du Lauragais Enfants et Adolescents\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation 18/02/2023 Cpam suivi et du 03/02/2023 CPAM suivi\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27213931_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"569308026\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DAVID LE PAVIC\", \"total_billed\": 110, \"bill_paid\": true, \"amount_paid\": 110, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Umh Amile\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2023\", \"security_number\": null, \"invoice_issuer\": \"DAVID LE PAVIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances de Psychologie\", \"quantity\": 1, \"date_of_service\": \"23-01-2023\", \"mandatory_coverage\": null, \"amount\": 55}, {\"description\": \"S\\u00e9ances de Psychologie\", \"quantity\": 1, \"date_of_service\": \"06-01-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27226776_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"446932477\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORENCE CONAN\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nolann Tiger\", \"beneficiary_dob\": null, \"invoice_date\": \"03-03-2023\", \"security_number\": null, \"invoice_issuer\": \"FLORENCE CONAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-03-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27226996_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Virginie GAUTIER\", \"total_billed\": 110.0, \"bill_paid\": true, \"amount_paid\": 110.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lucile GUILLEMOT\", \"beneficiary_dob\": null, \"invoice_date\": \"05-03-2023\", \"security_number\": null, \"invoice_issuer\": \"L'atelier du libre je\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi psychoth\\u00e9rapeutique de l'enfant Lucile GUILLEMOT\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 110.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27231841_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"759343957\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Am\\u00e9lie Fromm\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Julian GENDRE\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"06-03-2023\", \"security_number\": \"175078000108034\", \"invoice_issuer\": \"Am\\u00e9lie Fromm\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"06-03-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27231891_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"699334447\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sara HOIBIAN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOUTTEMANT Chlo\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2023\", \"security_number\": null, \"invoice_issuer\": \"Mme Sara HOIBIAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"27-01-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27231986_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"409303146\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Clio Dupouy\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"LABERTIT Nad\\u00e8ge\", \"insured_dob\": null, \"beneficiary_name\": \"LABERTIT Nad\\u00e8ge\", \"beneficiary_dob\": null, \"invoice_date\": \"22-02-2023\", \"security_number\": \"278104008802361\", \"invoice_issuer\": \"Clio Dupouy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie individuelle\", \"quantity\": 1, \"date_of_service\": \"22-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27232257_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"339315822\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Corinne Mascarro-Goacolou\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"GACOIN Laurent\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"03-03-2023\", \"security_number\": \"282 10 33 039 032 60\", \"invoice_issuer\": \"Corinne Mascarro-Goacolou\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"03-02-2023\", \"mandatory_coverage\": null, \"amount\": 40}, {\"description\": \"s\\u00e9ance(s) de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"03-03-2023\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27232480_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"139329031\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GUISSANI Laura\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bocquet V\\u00e9rane\", \"beneficiary_dob\": \"04-03-2002\", \"invoice_date\": \"02-03-2023\", \"security_number\": null, \"invoice_issuer\": \"GUISSANI Laura\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"02-03-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27234926_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"599300084\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie-Line KALINOWSKI\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DESTRIEZ Pauline\", \"beneficiary_dob\": null, \"invoice_date\": \"07-03-2023\", \"security_number\": null, \"invoice_issuer\": \"Marie-Line KALINOWSKI - Psychologue\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance th\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"07-03-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27235184_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 505, \"bill_paid\": true, \"amount_paid\": 505, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE SAINT Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"02-03-2023\", \"security_number\": \"298067864642555\", \"invoice_issuer\": \"CogCharonne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Bilan neuropsychologique\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 361.76}, {\"description\": \"Evaluation eff. intellectuelle\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.12}, {\"description\": \"Evaluation def. cog\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.12}, {\"description\": \"Frais de r\\u00e9servation\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27235260_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"650001753\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M.-L. BONDON\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VERIN Cristiano\", \"beneficiary_dob\": null, \"invoice_date\": \"09-02-2023\", \"security_number\": null, \"invoice_issuer\": \"M.-L. BONDON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"09-02-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27235261_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"650001753\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M.-L. BONDON\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VERIN Cristiano\", \"beneficiary_dob\": null, \"invoice_date\": \"07-03-2023\", \"security_number\": null, \"invoice_issuer\": \"M.-L. BONDON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"07-03-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27236936_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"269303319\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ang\\u00e8le FLORES-GENRE\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Eithan CHENOUL\", \"beneficiary_dob\": null, \"invoice_date\": \"27-02-2023\", \"security_number\": \"2720169388024\", \"invoice_issuer\": \"Ang\\u00e8le FLORES-GENRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION\", \"quantity\": 1, \"date_of_service\": \"27-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27238995_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"959321498\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SELLIER-MEYNARD Val\\u00e9rie\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cosson J\\u00e9r\\u00e9mie\", \"beneficiary_dob\": \"13-08-1984\", \"invoice_date\": \"07-03-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet PSYC.AVENIR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de suivi individuel - 07/03/2023\", \"quantity\": 1, \"date_of_service\": \"07-03-2023\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27396735_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"879302495\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"REBEIX CELINE\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sandra CHALAMEL-LACOURT\", \"beneficiary_dob\": null, \"invoice_date\": \"16-01-2023\", \"security_number\": null, \"invoice_issuer\": \"Madame REBEIX CELINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Psychologue\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27396737_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"879302495\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"REBEIX CELINE\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHALAMEL-LACOURT Sandra\", \"beneficiary_dob\": null, \"invoice_date\": \"13-03-2023\", \"security_number\": null, \"invoice_issuer\": \"Madame REBEIX CELINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Psychologue\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27399788_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"949309629\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Audrey Schocpfer\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Desm\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"26-04-2023\", \"security_number\": null, \"invoice_issuer\": \"Audrey Schocpfer\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"26-04-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27439925_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"949309629\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SCHOEPFER Audrey\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chard\", \"beneficiary_dob\": null, \"invoice_date\": \"10-05-2023\", \"security_number\": null, \"invoice_issuer\": \"SCHOEPFER Audrey\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"10-05-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27474258_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"839311982\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Kheira GUERNAN\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MONTURLI Audrey\", \"beneficiary_dob\": null, \"invoice_date\": \"18-05-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet GUERNAN-EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique du 18/05\", \"quantity\": 1, \"date_of_service\": \"18-05-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27566548_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"499852184\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mich\\u00e8le BALMES\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIDIL Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"27-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de Psychanalyse\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27586000_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"899302244\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tiph\\u00e8ne de Benque d'Agut\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Valentin Milot\", \"beneficiary_dob\": \"08-06-2017\", \"invoice_date\": \"29-06-2023\", \"security_number\": null, \"invoice_issuer\": \"T. de BENQUE d'AGUT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"29-06-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27601779_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"599324886\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jennifer SHETTLE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LACH Caroline\", \"beneficiary_dob\": null, \"invoice_date\": \"06-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Jennifer SHETTLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"06-07-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27614708_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"089301147\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Audrey RENAULT-TAFFIN\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Margaux MANCHE ISSE\", \"beneficiary_dob\": \"09-02-2016\", \"invoice_date\": \"09-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Audrey RENAULT-TAFFIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"08-07-2023\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27614709_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"089301147\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Audrey RENAULT-TAFFIN\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Margaux MANCHE ISSE\", \"beneficiary_dob\": \"09-02-2016\", \"invoice_date\": \"25-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Audrey RENAULT-TAFFIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"24-06-2023\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27629340_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SARAH DUFOUR\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Anais BESSIERE\", \"beneficiary_dob\": null, \"invoice_date\": \"18-07-2023\", \"security_number\": null, \"invoice_issuer\": \"SARAH DUFOUR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2023/ Consultation psychanalyse ou psychoth\\u00e9rapie / EMDR\", \"quantity\": 1, \"date_of_service\": \"18-07-2023\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27631166_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"759378953\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline Bernard\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Basse Ndeye\", \"beneficiary_dob\": null, \"invoice_date\": \"20-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Caroline Bernard\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de th\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"20-07-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27633256_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"069315125\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Andr\\u00e9 Quaderi\", \"total_billed\": 475, \"bill_paid\": true, \"amount_paid\": 475, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Barrier Dorian\", \"beneficiary_dob\": null, \"invoice_date\": \"27-10-2022\", \"security_number\": null, \"invoice_issuer\": \"CENTRE DE PSYCHOTH\\u00c9RAPIE EMDR AZUR\\u00c9EN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"01-09-2022\", \"mandatory_coverage\": null, \"amount\": 95}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"15-09-2022\", \"mandatory_coverage\": null, \"amount\": 95}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"29-09-2022\", \"mandatory_coverage\": null, \"amount\": 95}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"13-10-2022\", \"mandatory_coverage\": null, \"amount\": 95}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"10-10-2022\", \"mandatory_coverage\": null, \"amount\": 95}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27635893_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"539301333\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nathalie HUNEAU\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"POINTEAU Cassandra\", \"beneficiary_dob\": null, \"invoice_date\": \"20-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Nathalie HUNEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance psychologique\", \"quantity\": 1, \"date_of_service\": \"20-07-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27638903_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"499852184\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mich\\u00e8le BALMES\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIDIL Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"27-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de Psychanalyse\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27648502_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"131017824\", \"rpps_number\": \"10100832210\", \"finess_number\": null, \"doctor_name\": \"Charlotte DI RUGGIERO\", \"total_billed\": 95, \"bill_paid\": true, \"amount_paid\": 95, \"mandatory_coverage\": 50.2, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COPILLION MATHEO\", \"beneficiary_dob\": null, \"invoice_date\": \"11-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Docteur Charlotte DI RUGGIERO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"psychoth\\u00e9rapie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 50.2, \"amount\": 95}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27674566_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"329302822\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"NATHALIE CHAVES\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alexis Lucchini\", \"beneficiary_dob\": null, \"invoice_date\": \"02-08-2023\", \"security_number\": null, \"invoice_issuer\": \"NATHALIE CHAVES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation enfant\", \"quantity\": 1, \"date_of_service\": \"02-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27694376_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"599315900\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Desmaison Jean-Fran\\u00e7ois\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VANDENBREMT Franck\", \"beneficiary_dob\": null, \"invoice_date\": \"11-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Desmaison Jean-Fran\\u00e7ois\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Travail psychoth\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"11-07-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27699749_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"809301278\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Delphine COZETTE\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ASSELOT Florienne\", \"beneficiary_dob\": null, \"invoice_date\": \"04-08-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET DE PSYCHOLOGIE ET DE PSYCHOTHERAPIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychoth\\u00e9rapie / Suivi psychologique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27702894_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"949331268\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Hillary CODUTI\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 60, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VESTA Lucie\", \"beneficiary_dob\": \"07-12-1991\", \"invoice_date\": \"17-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Hillary CODUTI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"17-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27704038_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-27704038_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"229306758\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LISE GAUVIN\", \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": 0.0, \"insured_name\": \"Cindy FOURNIVAL\", \"insured_dob\": \"06-06-1944\", \"beneficiary_name\": \"Cindy FOURNIVAL\", \"beneficiary_dob\": \"06-06-1944\", \"invoice_date\": \"26-05-2023\", \"security_number\": null, \"invoice_issuer\": \"EI - LGPSY - LISE GAUVIN PSYCHOLOGUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Adulte - consultation de psychologie suivi - 30 minutes\", \"quantity\": 1, \"date_of_service\": \"26-05-2023\", \"mandatory_coverage\": null, \"amount\": 30.0}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"229306758\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LISE GAUVIN\", \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": 0.0, \"insured_name\": \"Cindy FOURNIVAL\", \"insured_dob\": \"06-06-1944\", \"beneficiary_name\": \"Cindy FOURNIVAL\", \"beneficiary_dob\": \"06-06-1944\", \"invoice_date\": \"24-06-2023\", \"security_number\": null, \"invoice_issuer\": \"EI - LGPSY - LISE GAUVIN PSYCHOLOGUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Adulte - consultation de psychologie suivi - 30 minutes\", \"quantity\": 1, \"date_of_service\": \"24-06-2023\", \"mandatory_coverage\": null, \"amount\": 30.0}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27746732_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"750013914\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne Nguyen\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SCHALKENS Patrick\", \"beneficiary_dob\": null, \"invoice_date\": \"06-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Anne Nguyen D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation\", \"quantity\": 1, \"date_of_service\": \"06-09-2023\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27746914_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770007987\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"D\\u00e9bora Torrinha Da Silva\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Henry NERVIL\", \"beneficiary_dob\": null, \"invoice_date\": \"28-08-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET CHIROPRATIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de Chiropraxie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27749409_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"940012917\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"J\\u00e9r\\u00e9my NATHAN\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LANNAREIX FANNY\", \"beneficiary_dob\": \"27-09-1974\", \"invoice_date\": \"29-08-2023\", \"security_number\": \"2740963113391\", \"invoice_issuer\": \"J\\u00e9r\\u00e9my NATHAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de chiropraxie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27752590_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770010312\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LAURIN JULIE\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CRISCUOLO Isabelle\", \"beneficiary_dob\": \"23-02-1974\", \"invoice_date\": \"02-09-2023\", \"security_number\": null, \"invoice_issuer\": \"LAURIN JULIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Chiropraxie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27752635_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"210001343\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PIRES CELIA\", \"total_billed\": 100.0, \"bill_paid\": false, \"amount_paid\": 100.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANELLI C\\u00e9line\", \"beneficiary_dob\": null, \"invoice_date\": \"08-09-2023\", \"security_number\": null, \"invoice_issuer\": \"PIRES CELIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins chiropratiques\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 100.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27756973_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"350008686\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Gildas BOUCHERIE\", \"total_billed\": 150, \"bill_paid\": true, \"amount_paid\": 150, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Perret Daniel\", \"beneficiary_dob\": \"23-03-1948\", \"invoice_date\": \"25-04-2023\", \"security_number\": null, \"invoice_issuer\": \"Gildas BOUCHERIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"premi\\u00e8re visite\", \"quantity\": 1, \"date_of_service\": \"11-04-2023\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"suivi\", \"quantity\": 1, \"date_of_service\": \"18-04-2023\", \"mandatory_coverage\": null, \"amount\": 45}, {\"description\": \"suivi\", \"quantity\": 1, \"date_of_service\": \"25-04-2023\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27757098_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"9D0001316\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BEAUDRON\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Puyet Liam\", \"insured_dob\": null, \"beneficiary_name\": \"Puyet Liam\", \"beneficiary_dob\": null, \"invoice_date\": \"20-07-2023\", \"security_number\": \"292089740703655\", \"invoice_issuer\": \"BEAUDRON Chiropractie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation chiropratique\", \"quantity\": 1, \"date_of_service\": \"20-07-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27758070_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"130008428\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fran\\u00e7ois-Xavier Fournier\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vanesse Maglett\", \"beneficiary_dob\": null, \"invoice_date\": \"14-12-2022\", \"security_number\": null, \"invoice_issuer\": \"Centre Chiropratique Fournier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s)\", \"quantity\": 1, \"date_of_service\": \"14-12-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27759503_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"600003248\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"OUCHART LYDIE\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHALVET Laurie\", \"beneficiary_dob\": \"21-09-1987\", \"invoice_date\": \"29-08-2023\", \"security_number\": null, \"invoice_issuer\": \"OUCHART LYDIE Chiropracteur\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Prestation\", \"quantity\": 1, \"date_of_service\": \"29-08-2023\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27760177_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"590005583\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Valentine Vonck\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NOIRET M\\u00e9lanie\", \"beneficiary_dob\": null, \"invoice_date\": \"13-03-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet Chiropratique Valentine VONCK\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"13-03-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/rib_cham_chuan-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/photo 2_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0abfd7b9-9456-4b68-a489-bd990ef9a78e_livret_de_famille-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/exemple-facture-infast-1448x2048_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/quittance-de-loyer-exemple_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/exemple-cvec_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/02a9ed8c-54b9-4504-a183-94e17ed6d6ee_facture_osteopathie_Me_RIBERO_Lucie_15_mars_2025_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/02a9ed8c-54b9-4504-a183-94e17ed6d6ee_facture_osteopathie_Me_RIBERO_Lucie_15_mars_2025_1_scale_1.5_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27764742_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"380003772\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabrice GAIME\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CRYSTAL ILMET\", \"beneficiary_dob\": null, \"invoice_date\": \"25-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Fabrice GAIME\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins de chiropraxie\", \"quantity\": 1, \"date_of_service\": \"25-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27764864_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"350008744\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sophie L\\u00e9veil\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GAUDET Jean Fran\\u00e7ois\", \"beneficiary_dob\": null, \"invoice_date\": \"31-08-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET DE CHIROPRAXIE Sophie L\\u00e9veil\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de chiropraxie\", \"quantity\": 1, \"date_of_service\": \"31-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27765274_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"640002192\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PHILIPPE PIOT\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GOYENECH HERVE\", \"beneficiary_dob\": null, \"invoice_date\": \"14-09-2023\", \"security_number\": \"168116410216487\", \"invoice_issuer\": \"PHILIPPE PIOT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation chiropratique\", \"quantity\": 1, \"date_of_service\": \"14-09-2023\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27767332_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"110002912\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Samuel BONNEAU\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SANMARTIN Claude\", \"beneficiary_dob\": null, \"invoice_date\": \"14-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Samuel BONNEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"visite(s)\", \"quantity\": 1, \"date_of_service\": \"14-09-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771414_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"830010542\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Coralie FRECHET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RENARD Marie No\\u00eblle\", \"beneficiary_dob\": \"18-12-1962\", \"invoice_date\": \"12-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet Chiropratique des Maures\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins de chiropraxie\", \"quantity\": 1, \"date_of_service\": \"12-09-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27773543_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"570000943\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Michel ALT\", \"total_billed\": 45, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RICHARD Huguette\", \"beneficiary_dob\": \"04-10-1940\", \"invoice_date\": \"29-08-2023\", \"security_number\": \"2401016028001\", \"invoice_issuer\": \"Michel ALT D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"29-08-2023\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27775989_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"610000358\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nicolas PARAGOT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Baudoin Sabrina\", \"beneficiary_dob\": null, \"invoice_date\": \"12-09-2023\", \"security_number\": \"283096116907747\", \"invoice_issuer\": \"PARAGOT Nicolas D.C. E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"traitement\", \"quantity\": 1, \"date_of_service\": \"12-09-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27781771_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"140000696\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe FLEURIAU\", \"total_billed\": 55.0, \"bill_paid\": false, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean Jacques JUSTE\", \"beneficiary_dob\": \"27-01-1950\", \"invoice_date\": \"23-05-2023\", \"security_number\": null, \"invoice_issuer\": \"Philippe FLEURIAU, D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Visite\", \"quantity\": 1, \"date_of_service\": \"23-05-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27781772_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"140000696\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe FLEURIAU\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean Jacques JUSTE\", \"beneficiary_dob\": \"27-01-1950\", \"invoice_date\": \"10-05-2023\", \"security_number\": null, \"invoice_issuer\": \"Philippe FLEURIAU, D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Visite\", \"quantity\": 1, \"date_of_service\": \"10-05-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784861_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"240002568\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Henri VIAL\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Prunis Celine\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2023\", \"security_number\": null, \"invoice_issuer\": \"SELARL Centre Chiropratique Vial\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"11-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784953_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"620001206\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline FACHON\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Planque Corinne\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"11-09-2023\", \"security_number\": \"254096219302792\", \"invoice_issuer\": \"Caroline FACHON D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"11-09-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26542194_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"311701882\", \"rpps_number\": \"10002859949\", \"finess_number\": \"312622020\", \"doctor_name\": \"DELFOUR-MALECAZE\", \"prescripteur_finess_number\": null, \"total_billed\": 237.13, \"bill_paid\": true, \"amount_paid\": 237.13, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"MAZZARIOL PATRICK\", \"insured_dob\": \"30-09-1948\", \"beneficiary_name\": \"MAZZARIOL PATRICK\", \"beneficiary_dob\": \"30-09-1948\", \"invoice_date\": \"17-08-2022\", \"security_number\": \"148093155543989\", \"invoice_issuer\": \"OPTIQUE LAFAYETTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et votre suivi\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Menicon - Menicon Z Progressive Mat\\u00e9riau EX - 10.1/7.9 - INDIVIDUELLE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 237.13}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26543127_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26543127_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"prescripteur_finess_number\": \"750066185\", \"total_billed\": 224.0, \"bill_paid\": true, \"amount_paid\": 224.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 224.0, \"remaining_payment\": 0.0, \"insured_name\": \"Xavier ABARE\", \"insured_dob\": \"12-07-1999\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"27-08-2022\", \"security_number\": \"199079710196642\", \"invoice_issuer\": \"Hors Pair\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Bausch & Lomb OPH SOFLENS 59 oeil droit\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 112.0}, {\"description\": \"Bausch & Lomb OPH SOFLENS 59 oeil gauche\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 112.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26543134_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"862601861\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 76.8, \"bill_paid\": true, \"amount_paid\": 76.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"DOLLE Pierre\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"25-08-2022\", \"security_number\": \"1470799352097\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique POITIERS SUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - Iwear fresh (30) Dur\\u00e9e de vie:Journali\\u00e8re (PU:12,80\\u20ac)SoupleCoopervisionBoite De 30 Lentilles\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 38.4}, {\"description\": \"Lentille G - Iwear fresh (30) Dur\\u00e9e de vie:Journali\\u00e8re (PU:12,80\\u20ac)SoupleCoopervisionBoite De 30 Lentilles\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 38.4}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26550397_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"382653921\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 92.0, \"bill_paid\": true, \"amount_paid\": 92.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"MARTEL MICHAEL\", \"insured_dob\": \"08-10-1990\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"17-06-2022\", \"security_number\": \"190107862103354\", \"invoice_issuer\": \"OPTICEO FONTAINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"NOVACEL NOV JOHNSON & JOHNSON ACUVUE OASYS WITH HYDRACLEAR PLUS 12L Diam. : 14,00 Rayon : 8,40\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 46.0}, {\"description\": \"NOVACEL NOV JOHNSON & JOHNSON ACUVUE OASYS WITH HYDRACLEAR PLUS 12L Diam. : 14,00 Rayon : 8,40\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 46.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26550623_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"252621271\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 100.0, \"bill_paid\": true, \"amount_paid\": 100.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"MAGNIN GUILLAUME\", \"insured_dob\": \"17-03-1990\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"10-08-2022\", \"security_number\": \"190032538807873\", \"invoice_issuer\": \"SARL OPTIQUE BALDUCELLI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et votre suivi\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CooperVision - Biofinity (6) - 14/8.6 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"CooperVision - Biofinity Toric (6) - 14.5/8.7 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"PACK RENU PROMO X4\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26553155_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26553155_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131751521\", \"rpps_number\": \"10100085330\", \"finess_number\": \"132657230\", \"doctor_name\": \"Romain COSTE\", \"prescripteur_finess_number\": null, \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"DRAPEAU MATHIEU\", \"insured_dob\": \"29-01-1984\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"17-08-2022\", \"security_number\": \"184011305590921\", \"invoice_issuer\": \"OPTIC'L NEGRESKO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPHTALMIC HR ONE DAY x30\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}, {\"description\": \"OPHTALMIC HR ONE DAY x30\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26683579_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"592610521\", \"doctor_name\": \"JOURDEL DENIS\", \"prescripteur_finess_number\": \"591875869\", \"total_billed\": 103.0, \"bill_paid\": true, \"amount_paid\": 103.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"POTIER Lya\", \"beneficiary_dob\": \"14-04-2004\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"LUNETTERIE DE LINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acuvue Oasys with Hydraclear plus 12L \\u00d814.0 (0.B)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 48.0}, {\"description\": \"Acuvue Oasys with Hydraclear plus 12L \\u00d814.0 (0.B)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 48.0}, {\"description\": \"FRAIS_LIVRAISON - Frais de livraison (1)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 7.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26684583_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132657297\", \"doctor_name\": \"ROUMIEU BRIGITTE\", \"prescripteur_finess_number\": null, \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"PICAVET NATHALIE\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"05-05-2022\", \"security_number\": \"277075917813337\", \"invoice_issuer\": \"GENERALE D'OPTIQUE SARL OPTIC D4\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - Iwear Fit x30 Blanc Dur\\u00e9e de vie Journali\\u00e8re (PU:0.50\\u20ac) Souple Coopervision Boite De 30 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}, {\"description\": \"Lentille G - Iwear Fit x30 Blanc Dur\\u00e9e de vie Journali\\u00e8re (PU:0.50\\u20ac) Souple Coopervision Boite De 30 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26685194_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132657297\", \"doctor_name\": \"ROUMIEU BRIGITTE\", \"prescripteur_finess_number\": null, \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"PICAVET NATHALIE\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"14-09-2022\", \"security_number\": \"277075917813337\", \"invoice_issuer\": \"GENERALE D'OPTIQUE SARL OPTIC D4\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - Iwear Fit x30 Blanc Dur\\u00e9e de vie Journali\\u00e8re (PU 0.50\\u20ac) Souple Coopervision Boite De 30 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}, {\"description\": \"Lentille G - Iwear Fit x30 Blanc Dur\\u00e9e de vie Journali\\u00e8re (PU 0.50\\u20ac) Souple Coopervision Boite De 30 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26685394_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"062602107\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 162.0, \"bill_paid\": true, \"amount_paid\": 162.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"MISSION Evelyne\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"25-10-2022\", \"security_number\": \"246027507601551\", \"invoice_issuer\": \"Laboratoire PROCONTACT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact \\u00e0 renouvellement fr\\u00e9quent\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 162.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26685750_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 31.5, \"bill_paid\": true, \"amount_paid\": 31.5, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"WIAME-MICHALON MARIE-ROSE\", \"beneficiary_dob\": null, \"invoice_date\": \"24-10-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SofLens daily disposable (90) (Sph\\u00e9rique) Oeil droit Sph\\u00e8re (PWR) : -3.00 (myope) Rayon (BC) : 8.60 Diam\\u00e8tre (DIA) : 14.20\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 31.5}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26687731_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"912606076\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 59.0, \"bill_paid\": true, \"amount_paid\": 59.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"JAAFAR Bassel\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"22-10-2022\", \"security_number\": \"1930542227500\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique VILLEBON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - Iwear Oxygen (6) Dur\\u00e9e de vie:Mensuelle (PU:29,00\\u20ac)SoupleCooperVisionBoite De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.0}, {\"description\": \"Lentille G - Iwear Oxygen (6) Dur\\u00e9e de vie:Mensuelle (PU:29,00\\u20ac)SoupleCooperVisionBoite De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.0}, {\"description\": \"Iwear Easybalance 3*380 mlCOV\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 1.0}, {\"description\": \"CAPIL FIDELITE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26688197_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752615948\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 95.0, \"bill_paid\": true, \"amount_paid\": 95.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"S\\u00e9bastien Paran\", \"insured_dob\": \"07-06-1979\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"18-09-2022\", \"security_number\": \"179066303202882\", \"invoice_issuer\": \"Optical Center\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPHTALMIC OPHTALMIC HR TORIC Sph\\u00e8re : -4.00 - Rayon (BC) : 8.70 - Diam\\u00e8tre (DIA) : 14.40 - Cylindre : -0.75 - Axe : 10.00 - Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"OPHTALMIC OPHTALMIC HR TORIC Sph\\u00e8re : -3.00 - Rayon (BC) : 8.70 - Diam\\u00e8tre (DIA) : 14.40 - Cylindre : -2.25 - Axe : 10.00 - Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"code promo\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -5.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26689382_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26689382_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"201862760\", \"rpps_number\": \"10003250072\", \"finess_number\": \"202645040\", \"doctor_name\": \"Etienne SEATELLI\", \"prescripteur_finess_number\": null, \"total_billed\": 170.0, \"bill_paid\": true, \"amount_paid\": 170.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"WARRAND NATHALIE\", \"insured_dob\": \"16-04-1961\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"27-04-2022\", \"security_number\": \"261047649829657\", \"invoice_issuer\": \"OPTIQUE DONATI BASTIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Alcon - DAILIES TOTAL 1 90L - 14.1/8.5 - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 85.0}, {\"description\": \"Alcon - DAILIES TOTAL 1 90L - 14.1/8.5 - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 85.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26691031_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131159840\", \"rpps_number\": null, \"finess_number\": \"132655150\", \"doctor_name\": \"Gabisson Pierre\", \"prescripteur_finess_number\": null, \"total_billed\": 162.0, \"bill_paid\": true, \"amount_paid\": 162.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"FREDERIC GARCIA\", \"insured_dob\": \"03-01-1982\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"02-06-2022\", \"security_number\": \"182011305510253\", \"invoice_issuer\": \"CYL'OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"L.D MYDAY BTE 90 N\\u00b0 Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 81.0}, {\"description\": \"L.G MYDAY BTE 90 N\\u00b0 Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 81.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26692355_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"622608347\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 168.0, \"bill_paid\": true, \"amount_paid\": 168.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"Bernard ERSKELBOUDT\", \"insured_dob\": \"24-12-1953\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"21-06-2022\", \"security_number\": \"153125936110354\", \"invoice_issuer\": \"OPTIQUE DAVRILLON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"V.D OSV 150 DURCI . 65 GRIS VERT 3\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.0}, {\"description\": \"V.G OSV 150 DURCI . 65 GRIS VERT 3\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.0}, {\"description\": \"MO FIM027 SDL 1 55/17\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26692622_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752613943\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 69.6, \"bill_paid\": true, \"amount_paid\": 69.6, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUILLON Sebastien\", \"beneficiary_dob\": null, \"invoice_date\": \"01-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Grand Optical FORUM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE\\u00ae OASYS\\u00ae 1-Day with HydraLuxe\\u2122 Vente par 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.6}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26692646_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"172602963\", \"doctor_name\": \"Lucie GIRAUD\", \"prescripteur_finess_number\": null, \"total_billed\": 90.0, \"bill_paid\": true, \"amount_paid\": 90.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"VAIE Olivier\", \"insured_dob\": \"20-12-1993\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"31-10-22\", \"security_number\": \"193127904924183\", \"invoice_issuer\": \"Aunis Vision\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Toric (6) \\u00d814.5 (8.7)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.0}, {\"description\": \"Biofinity Toric (6) \\u00d814.5 (8.7)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26692700_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Damien MACHADO\", \"beneficiary_dob\": null, \"invoice_date\": \"29-10-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE VITA (6) (Sph\\u00e9rique) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.5}, {\"description\": \"ACUVUE VITA (6) (Sph\\u00e9rique) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.5}, {\"description\": \"Emballage r\\u00e9utilisable Hipli\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 1.0}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26694527_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26694527_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131182818\", \"rpps_number\": null, \"finess_number\": \"132606518\", \"doctor_name\": \"GONNET ISABELLE\", \"prescripteur_finess_number\": null, \"total_billed\": 187.0, \"bill_paid\": true, \"amount_paid\": 187.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 187.0, \"remaining_payment\": 0.0, \"insured_name\": \"GROULT CHRISTINE\", \"insured_dob\": \"28-09-1958\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"15-10-2022\", \"security_number\": \"2580913055683\", \"invoice_issuer\": \"ALAIN AFFLELOU OPTIQUE HASBANIAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Multifocal D (6)\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 79.0}, {\"description\": \"Biofinity Multifocal N (6)\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 79.0}, {\"description\": \"Produits Lentilles\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.0}, {\"description\": \"ACUVUE PRODUIT LENTILLES REVITALENS 3X360M\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26698706_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 47.8, \"bill_paid\": true, \"amount_paid\": 47.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie LESAGE\", \"beneficiary_dob\": null, \"invoice_date\": \"27-10-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Air Optix Aqua (6) (Sph\\u00e9rique) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 23.9}, {\"description\": \"Air Optix Aqua (6) (Sph\\u00e9rique) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 23.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.3}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -5.3}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26702327_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752614057\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 48.4, \"bill_paid\": true, \"amount_paid\": 48.4, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"CAUDOUX Xavier\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"26-10-2022\", \"security_number\": \"1710775112196\", \"invoice_issuer\": \"Generale d'Optique PARIS SAINT LAZARE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - ACUVUE\\u00ae OASYS\\u00ae for ASTIGMATISM vente par 6 Dur\\u00e9e de vie:Bi-mensuelle (PU:28,80\\u20ac)SoupleJohnson & JohnsonBoite De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 28.8}, {\"description\": \"Lentille G - ACUVUE\\u00ae OASYS\\u00ae with HYDRACLEAR\\u00ae Plus vente par 6 Dur\\u00e9e de vie:Bi-mensuelle (PU:19,60\\u20ac)SoupleJohnson & JohnsonBoite De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 19.6}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26705114_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"062603865\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"SANTONI JACQUES\", \"insured_dob\": \"11-08-1979\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"05-11-2022\", \"security_number\": \"179080602908438\", \"invoice_issuer\": \"EUROP'OPTIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LENTILLE D : BAUSCH+LOMB PUREVISION 2 HD\", \"quantity\": \"1\", \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}, {\"description\": \"LENTILLE G : BAUSCH+LOMB PUREVISION 2 HD\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26705131_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 33.9, \"bill_paid\": true, \"amount_paid\": 33.9, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Catherine MILTON\", \"beneficiary_dob\": null, \"invoice_date\": \"26-10-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE Oasys (12) (Sph\\u00e9rique) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 33.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.0}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -5.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26705567_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"602605792\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 256.0, \"bill_paid\": true, \"amount_paid\": 256.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"CAUPIN Leanne\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"29-10-2022\", \"security_number\": \"202056061233435\", \"invoice_issuer\": \"PROMOPTIC CREPY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson And Johnson (ATO) ACUVUE\\u00ae OASYS for ASTIGMATISM 6P, diam : 14.5, ray : 8.6, souple, Ren: Bimensuel (par 6)\", \"quantity\": 4, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 128.0}, {\"description\": \"Johnson And Johnson (ATO) ACUVUE\\u00ae OASYS for ASTIGMATISM 6P, diam : 14.5, ray : 8.6, souple, Ren: Bimensuel (par 6)\", \"quantity\": 4, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 128.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26705886_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26705886_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"922614250\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 36.6, \"bill_paid\": true, \"amount_paid\": 36.6, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Juliette TESSIER\", \"beneficiary_dob\": null, \"invoice_date\": \"01-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Krys\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact Acuvue Oasys Bo\\u00eete de 6 Ethicon Vistakon\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 18.3}, {\"description\": \"Lentilles de contact Acuvue Oasys Bo\\u00eete de 6 Ethicon Vistakon\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 18.3}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26705902_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 39.1, \"bill_paid\": true, \"amount_paid\": 39.1, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ad\\u00e9lie POMMIER\", \"beneficiary_dob\": null, \"invoice_date\": \"16-10-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Air Optix Plus Hydraglyde (6) (Lentilles sph\\u00e9riques)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 24.9}, {\"description\": \"Menicare Soft 2x360ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 17.2}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"R\\u00e9duction produit(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -3.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26709735_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"272695867\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 29.0, \"bill_paid\": true, \"amount_paid\": 29.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"LIBERT FREDERIC\", \"insured_dob\": \"23-12-1982\", \"beneficiary_name\": \"M. LIBERT FREDERIC\", \"beneficiary_dob\": \"23-12-1982\", \"invoice_date\": \"28-10-2022\", \"security_number\": \"182127510908061\", \"invoice_issuer\": \"ECOUTER VOIR - Evreux\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Bausch And Lomb VISAUDIO - Pack BIOTRUE 3*300 ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26709999_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131170425\", \"rpps_number\": null, \"finess_number\": \"132656604\", \"doctor_name\": \"REYMONDET Jean-marc\", \"prescripteur_finess_number\": null, \"total_billed\": 98.0, \"bill_paid\": true, \"amount_paid\": 98.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"Chantal MILLION-ROUSSEAU\", \"insured_dob\": \"28-12-1957\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"25-10-2022\", \"security_number\": \"257120103433093\", \"invoice_issuer\": \"Le Collectif des Lunetiers Aix en Provence\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Precilens Laboratoires-25, rue Auguste Perret-FR-94046 CRETEIL C2 X'TRA HIGH BOITE Lentille Souple avec \\u00e9tui pour lentilles de contact. Lentille en Hydrogel de silicone (SILICONE). 6 lentilles par boite Quantit\\u00e9 : 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 98.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26715605_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"781074547\", \"rpps_number\": \"10000749852\", \"finess_number\": \"782665921\", \"doctor_name\": \"PAILLARD\", \"prescripteur_finess_number\": null, \"total_billed\": 128.0, \"bill_paid\": true, \"amount_paid\": 128.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTINEZ MATHIEU\", \"beneficiary_dob\": \"02-05-1994\", \"invoice_date\": \"14-10-2022\", \"security_number\": null, \"invoice_issuer\": \"OPTIC 2000\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et votre suivi\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CooperVision - MyDay (90) - 14.2/8.4 - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 64.0}, {\"description\": \"CooperVision - MyDay (90) - 14.2/8.4 - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 64.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26717084_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"782666242\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 47.0, \"bill_paid\": true, \"amount_paid\": 47.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BELLEGUIC Yann\", \"beneficiary_dob\": null, \"invoice_date\": \"26-10-2022\", \"security_number\": null, \"invoice_issuer\": \"VISION D'OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"COV Biofinity Toric (6), diam : 14.5, ray : 8.7, souple, journalier, Ren: Mensuel (par 6)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26719410_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26719410_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"922615380\", \"doctor_name\": \"DOUCET JEAN-FRANCOIS\", \"prescripteur_finess_number\": \"921048138\", \"total_billed\": 272.0, \"bill_paid\": true, \"amount_paid\": 272.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"PINVILLE Viviane\", \"insured_dob\": null, \"beneficiary_name\": \"PINVILLE Viviane\", \"beneficiary_dob\": null, \"invoice_date\": \"04-11-2022\", \"security_number\": \"257129723138968\", \"invoice_issuer\": \"LISSAC ANTONY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"4 ACUVUE\\u00ae OASYS\\u00ae with HYDRACLEA R\\u00ae Plus vente par 6 \\u00d814 (8.4) 6P\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 136.0}, {\"description\": \"4 ACUVUE\\u00ae OASYS\\u00ae with HYDRACLEA R\\u00ae Plus vente par 6 \\u00d814 (8.4) 6P\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 136.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26722562_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"341016145\", \"rpps_number\": null, \"finess_number\": \"342696432\", \"doctor_name\": \"HIJAZI Azzam\", \"prescripteur_finess_number\": null, \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"Doriane FOURMY\", \"insured_dob\": \"09-01-1986\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"10-11-2022\", \"security_number\": \"286019403302209\", \"invoice_issuer\": \"EURL OPTIQUE DESMIDT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Coopervision France -1800, rue des Cr\\u00eates- FR-06560 Sophia Antipolis 30 x S KRYS JOURNALIERE 30 Lentille Souple Jetable Port Journalier, Conditionnement: 30\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}, {\"description\": \"Coopervision France -1800, rue des Cr\\u00eates- FR-06560 Sophia Antipolis 30 x S KRYS JOURNALIERE 30 Lentille Souple Jetable Port Journalier, Conditionnement: 30\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26722801_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752630723\", \"doctor_name\": null, \"prescripteur_finess_number\": \"750060394\", \"total_billed\": 92.0, \"bill_paid\": true, \"amount_paid\": 92.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"NDAME OSCAR\", \"insured_dob\": \"15-11-1991\", \"beneficiary_name\": \"NDAME OSCAR\", \"beneficiary_dob\": \"15-11-1991\", \"invoice_date\": \"19-10-2022\", \"security_number\": \"191119304829653\", \"invoice_issuer\": \"VISION DU CANAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SOFLENS 59\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 92.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26722802_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752630723\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 342.0, \"bill_paid\": true, \"amount_paid\": 342.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"DIARRA ISMAEL\", \"insured_dob\": \"17-07-2001\", \"beneficiary_name\": \"DIARRA ISMAEL\", \"beneficiary_dob\": \"17-07-2001\", \"invoice_date\": \"19-12-2021\", \"security_number\": \"101077511960125\", \"invoice_issuer\": \"VISION DU CANAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SOFLENS 59\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 171.0}, {\"description\": \"SOFLENS 59\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 171.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26723200_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"770023000\", \"rpps_number\": null, \"finess_number\": \"772301589\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 79.0, \"bill_paid\": true, \"amount_paid\": 79.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"DENISE MARION\", \"insured_dob\": \"17-11-1992\", \"beneficiary_name\": \"DENISE MARION\", \"beneficiary_dob\": null, \"invoice_date\": \"12-11-2022\", \"security_number\": \"292119405230391\", \"invoice_issuer\": \"LISSAC CK CHENAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et votre suivi\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Coopervision - Biofinity 6L - 14/8.6 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 34.0}, {\"description\": \"Coopervision - Biofinity Toric 6L - 14.5/8.7 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26723534_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"641023809\", \"rpps_number\": null, \"finess_number\": \"642695985\", \"doctor_name\": \"heldenberg\", \"prescripteur_finess_number\": null, \"total_billed\": 258.0, \"bill_paid\": true, \"amount_paid\": 258.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 258.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": \"03-12-1982\", \"beneficiary_name\": \"Linda RESSEGUIER\", \"beneficiary_dob\": null, \"invoice_date\": \"29-09-2022\", \"security_number\": null, \"invoice_issuer\": \"EURL SOCOA OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"L.D PUREVISION 2 HD N\\u00b0 LOT 0\", \"quantity\": 6, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 258.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26723851_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 72.0, \"bill_paid\": true, \"amount_paid\": 72.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Melina FERRANTE\", \"beneficiary_dob\": null, \"invoice_date\": \"14-11-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE Oasys 1-Day (90) (Sph\\u00e9rique) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.5}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.5}, {\"description\": \"R\\u00e9duction produit(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -3.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26727954_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 67.5, \"bill_paid\": true, \"amount_paid\": 67.5, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Serge MABRUT\", \"beneficiary_dob\": null, \"invoice_date\": \"10-11-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Freshlook COLORBLENDS (2) (Sph\\u00e9rique) Oeil gauche\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 31.8}, {\"description\": \"Freshlook COLORBLENDS (2) (Sph\\u00e9rique) Oeil droit\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 31.8}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 3.9}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26728671_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26728671_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"771030640\", \"rpps_number\": \"10000680560\", \"finess_number\": \"132605262\", \"doctor_name\": \"Magdalena De SAINT JEAN\", \"prescripteur_finess_number\": null, \"total_billed\": 92.0, \"bill_paid\": true, \"amount_paid\": 92.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"ARNOULT FRANK\", \"insured_dob\": \"22-10-1962\", \"beneficiary_name\": \"ARNOULT FRANK\", \"beneficiary_dob\": \"22-10-1962\", \"invoice_date\": \"09-11-2022\", \"security_number\": \"162103018918114\", \"invoice_issuer\": \"LUYNES OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CooperVision - Options Oxy 1-Day Multifocal High (30) - 14.1/8.6 - BOITE DE 30\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 46.0}, {\"description\": \"CooperVision - Options Oxy 1-Day Multifocal High (30) - 14.1/8.6 - BOITE DE 30\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 46.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26729570_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26729570_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131066425\", \"rpps_number\": \"10003315883\", \"finess_number\": \"132663592\", \"doctor_name\": \"SLAMA\", \"prescripteur_finess_number\": null, \"total_billed\": 100.0, \"bill_paid\": true, \"amount_paid\": 100.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"POURTY DOMINIQUE\", \"insured_dob\": \"09-09-1958\", \"beneficiary_name\": \"POURTY DOMINIQUE\", \"beneficiary_dob\": \"09-09-1958\", \"invoice_date\": \"07-11-2022\", \"security_number\": \"258099380471622\", \"invoice_issuer\": \"L'OPTICIEN AFFLELOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et votre suivi\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Johnson & Johnson - Acuvue\\u00ae OASYS\\u00ae for Presbyopia - 14.3/8.4 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"Johnson & Johnson - Acuvue\\u00ae OASYS\\u00ae for Presbyopia - 14.3/8.4 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26729830_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"842607020\", \"doctor_name\": \"Anais LLORENS\", \"prescripteur_finess_number\": null, \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ACCOSSANO Pauline\", \"beneficiary_dob\": \"19-07-1981\", \"invoice_date\": \"02-11-2022\", \"security_number\": null, \"invoice_issuer\": \"ALAIN AFFLELOU SARL AB OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Eph\\u00e9m\\u00e8re Confort Silicone Hydrogel 30L \\u00d814.0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}, {\"description\": \"Eph\\u00e9m\\u00e8re Confort Silicone Hydrogel 30L \\u00d814.0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26729989_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"451027643\", \"rpps_number\": \"10003780912\", \"finess_number\": \"452670029\", \"doctor_name\": \"SALAMON\", \"prescripteur_finess_number\": null, \"total_billed\": 54.0, \"bill_paid\": true, \"amount_paid\": 54.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"THOISON SYLVIE\", \"insured_dob\": \"19-02-1977\", \"beneficiary_name\": \"KILINAN NAELYA\", \"beneficiary_dob\": \"31-12-2004\", \"invoice_date\": \"10-10-2022\", \"security_number\": \"277024520806189\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et votre suivi\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Johnson & Johnson Medical SAS - ACUVUE OASYS with HYDRACLEAR\\u00ae Plus 6L - 14/8.4 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 27.0}, {\"description\": \"Johnson & Johnson Medical SAS - ACUVUE OASYS with HYDRACLEAR\\u00ae Plus 6L - 14/8.4 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 27.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26731052_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Damien MACHADO\", \"beneficiary_dob\": null, \"invoice_date\": \"29-10-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE VITA (6) (Sph\\u00e9rique) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.5}, {\"description\": \"ACUVUE VITA (6) (Sph\\u00e9rique) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.5}, {\"description\": \"Emballage r\\u00e9utilisable Hipli\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 1.0}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26732493_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752618662\", \"doctor_name\": \"FISZENSON Olivier\", \"prescripteur_finess_number\": \"921204624\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LIBER clara\", \"beneficiary_dob\": \"21-03-1993\", \"invoice_date\": \"05-04-2021\", \"security_number\": null, \"invoice_issuer\": \"VISION DESIGN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BIOTRUE ONE DAY\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}, {\"description\": \"BIOTRUE ONE DAY\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26732759_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26732759_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 88.08, \"bill_paid\": true, \"amount_paid\": 88.08, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laurence LAFFORGUE\", \"beneficiary_dob\": null, \"invoice_date\": \"31-10-2022\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Multifocal Corrections OEIL DROIT\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 44.04}, {\"description\": \"Biofinity Multifocal Corrections OEIL GAUCHE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 44.04}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26733889_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"662696970\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 64.8, \"bill_paid\": true, \"amount_paid\": 64.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"CARDINAEL PERRINE\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"04-11-2022\", \"security_number\": \"2710959350561\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique PERPIGNAN CLAIRA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille G - Jeter (H 30) Dur\\u00e9e de vie Journali\\u00e8re (PU:10,80\\u20ac)SoupleCooperVisionBiofinite De 30 Lentilles\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 32.4}, {\"description\": \"Lentille D - Jeter (H 30) Dur\\u00e9e de vie Journali\\u00e8re (PU:10,80\\u20ac)SoupleCooperVisionBiofinite De 30 Lentilles\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 32.4}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26736761_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131051484\", \"rpps_number\": null, \"finess_number\": \"132657446\", \"doctor_name\": \"SFERLAZZO SERGE\", \"prescripteur_finess_number\": null, \"total_billed\": 78.0, \"bill_paid\": true, \"amount_paid\": 78.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"AIDELI SARAH\", \"insured_dob\": \"10-08-1995\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"20-08-2021\", \"security_number\": \"295081315583612\", \"invoice_issuer\": \"USSAC GRAND LITTORAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Hors LPP\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 38.0}, {\"description\": \"Hors LPP\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 38.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Attestation de droits RO-25180485-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/287b78bf-1861-467f-973f-21aad6be26f1_livret_de_famille_Humez_25022024-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/preview-facture-2014-09-10-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 27029579_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/355eee5a-5358-43fd-a0b5-ae7d62406996_livretDeFamille-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Bulletin_de_salaire1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/95918650-771a-430c-9039-1601d46d19df_attestation_securite_sociale__1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/mobilenew2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/facture-engie-page-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26740050_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"380780080\", \"rpps_number\": null, \"finess_number\": \"382685352\", \"doctor_name\": \"CHU DE GRENOBLE Chu\", \"prescripteur_finess_number\": null, \"total_billed\": 23.0, \"bill_paid\": true, \"amount_paid\": 23.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"Nathalie GRUBAC\", \"insured_dob\": \"02-09-1972\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"26-07-2022\", \"security_number\": \"272097425601416\", \"invoice_issuer\": \"VERCORS OPTIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Coopervision France-1800 rtedes Cr\\u00eates-FR-04560 Sophia Antipolis-BIOAIR CONFORT MENSUELLE Lentille Souple, Fr\\u00e9quent, Port Prolong\\u00e9 Conditionnement : 6 Quantit\\u00e9 : 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 23.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26740051_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"382685352\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 18.0, \"bill_paid\": true, \"amount_paid\": 18.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"Nathalie GRUBAC\", \"insured_dob\": \"02-09-1972\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"26-07-2022\", \"security_number\": \"272097425601416\", \"invoice_issuer\": \"VERCORS OPTIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Produit Lentille - Codir Bioair Mult 3X350Ml 3X350 Ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 18.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26740699_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"602604563\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 27.9, \"bill_paid\": true, \"amount_paid\": 27.9, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marc RODRIGUES\", \"beneficiary_dob\": \"28-05-1993\", \"invoice_date\": \"07-11-2022\", \"security_number\": null, \"invoice_issuer\": \"SAS OPTIQUE CACHERA - KRYS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Produit/Lentille - Menicon Europe Kit Voyage Menicare\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 14.0}, {\"description\": \"Produit/Lentille - Menicon Europe Progent SD.A+ SD.B\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 13.9}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26740934_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26740934_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"591103254\", \"rpps_number\": \"10002225133\", \"finess_number\": \"592650642\", \"doctor_name\": \"SMAL\", \"prescripteur_finess_number\": null, \"total_billed\": 88.0, \"bill_paid\": true, \"amount_paid\": 88.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"MOINET HUGO\", \"insured_dob\": \"25-09-1998\", \"beneficiary_name\": \"MOINET HUGO\", \"beneficiary_dob\": \"25-09-1998\", \"invoice_date\": \"29-10-2022\", \"security_number\": \"198095956033043\", \"invoice_issuer\": \"Ecouter Voir Optique Mutualiste Lille\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et votre suivi\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Ophthalmic VISAUDIO - Ophthalmic HR Spheric (6) - 14.2/8.6 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 44.0}, {\"description\": \"Ophthalmic VISAUDIO - Ophthalmic HR Spheric (6) - 14.2/8.6 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 44.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26748031_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26748031_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"652600545\", \"doctor_name\": \"MAES-CASTELLARIN Sylvie\", \"prescripteur_finess_number\": \"651016255\", \"total_billed\": 360.0, \"bill_paid\": true, \"amount_paid\": 155.85, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 204.06, \"client_part\": 155.85, \"remaining_payment\": 0.0, \"insured_name\": \"LARROZE LAUGA CAMILLE\", \"insured_dob\": \"27-08-1998\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"02-06-2022\", \"security_number\": \"298086544014808\", \"invoice_issuer\": \"Centre Optique Opti'Soins\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ROUS - ROUS - GRAC - 93 (1)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 60.02, \"amount\": 120.0}, {\"description\": \"OSF - Swiss ORGA 160 B-UV Optiplas Dual+ UV - 75\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 72.02, \"amount\": 120.0}, {\"description\": \"OSF - Swiss ORGA 160 B-UV Optiplas Dual+ UV - 75\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 72.02, \"amount\": 120.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26749684_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26749684_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"371030412\", \"rpps_number\": \"10002030574\", \"finess_number\": \"372641316\", \"doctor_name\": \"DOMINIQUE LECERF\", \"prescripteur_finess_number\": null, \"total_billed\": 400.0, \"bill_paid\": true, \"amount_paid\": 400.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE MOULLEC Am\\u00e9lie\", \"beneficiary_dob\": null, \"invoice_date\": \"10-11-2022\", \"security_number\": null, \"invoice_issuer\": \"SARL BORDO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PRE DRL PREVENTION XO Violet, diam : 10.2, ray : 7.5, rigide, Journalier, Ren: Annuel (par 1)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 200.0}, {\"description\": \"PRE DRL PREVENTION XO2 Bleu, diam : 10, ray : 7.45, rigide, Journalier, Ren: Annuel (par 1)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 200.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26751453_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"922614250\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 24.9, \"bill_paid\": true, \"amount_paid\": 24.9, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Am\\u00e9lie Leterrier\", \"beneficiary_dob\": null, \"invoice_date\": \"22-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Krys\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact Air Optix Plus HydraGlyde Boite de 6 Alcon\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 24.9}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26752245_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 87.0, \"bill_paid\": true, \"amount_paid\": 87.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fatiha AIT-ALY\", \"beneficiary_dob\": null, \"invoice_date\": \"30-08-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Air Optix Night & Day Aqua (6) (Sph\\u00e9rique) Oeil droit Sph\\u00e8re (PWR) : -3,50 (myope) Rayon (BC) : 8,40 Diam\\u00e8tre (DIA) : 13,80\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 43.5}, {\"description\": \"Air Optix Night & Day Aqua (6) (Sph\\u00e9rique) Oeil gauche Sph\\u00e8re (PWR) : -4,75 (myope) Rayon (BC) : 8,40 Diam\\u00e8tre (DIA) : 13,80\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 43.5}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.3}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -5.3}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26760297_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 64.04, \"bill_paid\": true, \"amount_paid\": 64.04, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COUPEZ R\\u00e9gis\", \"beneficiary_dob\": null, \"invoice_date\": \"08-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Optik Kaufman SA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BIOFTO-DIA14.5-BC8.7-PWRM0.5-CYLM1.75-AX10\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 32.02}, {\"description\": \"BIOFTO6-DIA14.5-BC8.7-PWR0-CYLM2.25-AX170\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 32.02}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26761307_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"782664361\", \"doctor_name\": \"ALEXANDRE Sophie\", \"prescripteur_finess_number\": \"921106290\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 120.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"GUILLERET Nicolas\", \"insured_dob\": \"18-03-1985\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"20-10-2022\", \"security_number\": \"185039501809604\", \"invoice_issuer\": \"Optical Discount\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SoftLens toric \\u00d814.5 (8.5)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"SoftLens toric \\u00d814.5 (8.5)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26762901_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752631044\", \"doctor_name\": \"TOUMY ALAIN Toumy Alain\", \"prescripteur_finess_number\": \"750062101\", \"total_billed\": 350.0, \"bill_paid\": true, \"amount_paid\": 350.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"SECK Kadija\", \"insured_dob\": \"08-11-2006\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"21-11-2022\", \"security_number\": \"273059932611336\", \"invoice_issuer\": \"bhs optique\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BIOTRUE ONEDAY BOITE DE 90 \\u00d814,2 (8,6)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 175.0}, {\"description\": \"BIOTRUE ONEDAY BOITE DE 90 \\u00d814,2 (8,6)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 175.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26764274_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131051484\", \"rpps_number\": null, \"finess_number\": \"132657446\", \"doctor_name\": \"SFERLAZZO SERGE\", \"prescripteur_finess_number\": null, \"total_billed\": 78.0, \"bill_paid\": true, \"amount_paid\": 78.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"AIDELI SARAH\", \"insured_dob\": \"10-08-1995\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"20-08-2021\", \"security_number\": \"295081315583612\", \"invoice_issuer\": \"USSAC GRAND LITTORAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Hors LPP\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 38.0}, {\"description\": \"Hors LPP\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 38.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26765337_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26765337_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617002\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 317.17, \"bill_paid\": true, \"amount_paid\": 317.17, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Annie Jean-Pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"23-11-2022\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE Oasys (12) (Lentilles sph\\u00e9riques) Oeil droit\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 79.8}, {\"description\": \"ACUVUE Oasys (12) (Lentilles sph\\u00e9riques) Oeil droit\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 79.8}, {\"description\": \"ACUVUE Oasys (12) (Lentilles sph\\u00e9riques) Oeil gauche\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 79.8}, {\"description\": \"ACUVUE Oasys (12) (Lentilles sph\\u00e9riques) Oeil gauche\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 79.8}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.0}, {\"description\": \"R\\u00e9duction produit(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -7.03}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26771600_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"112697446\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 220.0, \"bill_paid\": true, \"amount_paid\": 220.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"ANKRI VERONIQUE\", \"insured_dob\": \"27-12-1960\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"19-11-2022\", \"security_number\": \"260127511539611\", \"invoice_issuer\": \"EXPERT VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PRECILENS - PRECILENS DRLI Diam. : 14,20 Rayon : 8,60\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 220.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26771865_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"781095609\", \"rpps_number\": \"10000777952\", \"finess_number\": \"782652341\", \"doctor_name\": \"ROZENBAUM\", \"prescripteur_finess_number\": null, \"total_billed\": 180.0, \"bill_paid\": true, \"amount_paid\": 180.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"GARCON JEAN LOUIS\", \"insured_dob\": \"12-06-1979\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"24-06-2022\", \"security_number\": null, \"invoice_issuer\": \"ALAIN AFFLELOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Coopervision - Eph\\u00e9m\\u00e8re Confort Silicone Hydrogel 30L - 14/8.6 - BOITE DE 30\", \"quantity\": 6, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Coopervision - Eph\\u00e9m\\u00e8re Confort Silicone Hydrogel 30L - 14/8.6 - BOITE DE 30\", \"quantity\": 6, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26772418_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26772418_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"prescripteur_finess_number\": null, \"total_billed\": 64.34, \"bill_paid\": true, \"amount_paid\": 64.34, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"Maeva ASSOUAN\", \"insured_dob\": null, \"beneficiary_name\": \"Maeva ASSOUAN\", \"beneficiary_dob\": null, \"invoice_date\": \"20-09-2022\", \"security_number\": \"282022923208418\", \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SoftLens For Astigmatism (6) (Torique) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 23.67}, {\"description\": \"SoftLens For Astigmatism (6) (Torique) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 23.67}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 17.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26772554_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"010616021\", \"doctor_name\": \"RAMEL JEAN-CHRISTOPHE\", \"prescripteur_finess_number\": null, \"total_billed\": 217.51, \"bill_paid\": true, \"amount_paid\": 90.0, \"mandatory_coverage\": 127.51, \"complementary_coverage\": null, \"client_part\": 90.0, \"remaining_payment\": 0, \"insured_name\": \"LEGRAND RENE\", \"insured_dob\": \"07-06-1954\", \"beneficiary_name\": \"LEGRAND RENE\", \"beneficiary_dob\": \"07-06-1954\", \"invoice_date\": \"14-11-2022\", \"security_number\": \"154061436400961\", \"invoice_issuer\": \"RAMEL JEAN-CHRISTOPHE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BGNP004 Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": null, \"date_of_service\": \"14-11-2022\", \"mandatory_coverage\": null, \"amount\": 192.19}, {\"description\": \"BGQP002 Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": null, \"date_of_service\": \"14-11-2022\", \"mandatory_coverage\": null, \"amount\": 25.32}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26772555_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RAMEL JEAN-CHRISTOPHE\", \"prescripteur_finess_number\": null, \"total_billed\": 122.03, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": 62.03, \"complementary_coverage\": null, \"client_part\": 60.0, \"remaining_payment\": 0.0, \"insured_name\": \"LEGRAND RENE\", \"insured_dob\": \"07-06-1954\", \"beneficiary_name\": \"LEGRAND RENE\", \"beneficiary_dob\": \"07-06-1954\", \"invoice_date\": \"10-11-2022\", \"security_number\": \"154061436400961\", \"invoice_issuer\": \"RAMEL JEAN-CHRISTOPHE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": null, \"date_of_service\": \"10-11-2022\", \"mandatory_coverage\": null, \"amount\": 101.62}, {\"description\": \"Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": null, \"date_of_service\": \"10-11-2022\", \"mandatory_coverage\": null, \"amount\": 20.41}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26772899_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"762652352\", \"doctor_name\": \"ETIENNE LETESSON\", \"prescripteur_finess_number\": null, \"total_billed\": 74.0, \"bill_paid\": true, \"amount_paid\": 74.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROUVIERE RENAUD\", \"beneficiary_dob\": null, \"invoice_date\": \"24-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Grand Optical ROUEN CENTRE VILLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - 1-DAY ACUVUE\\u00ae MOIST\\u00ae for ASTIGMATISM vente par 90 Dur\\u00e9e de vie journali\\u00e8re (DLJ) 64,00\\u20ac/Scple Johnson & Johnson/Boite De 90 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 64.0}, {\"description\": \"MA BOX LENTILLES\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 10.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26774610_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/DDE prestation Facture des lentilles de contact-26774610_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"061073920\", \"rpps_number\": \"10003293268\", \"finess_number\": \"062604756\", \"doctor_name\": \"NAKACHE PATRICK\", \"prescripteur_finess_number\": null, \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 160.0, \"remaining_payment\": 0.0, \"insured_name\": \"CALVINO Robert\", \"insured_dob\": \"09-04-1962\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"14-09-2022\", \"security_number\": \"162040608810286\", \"invoice_issuer\": \"Optique GAMBETTA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"L.D BIOTRUE N\\u00b0 Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 80.0}, {\"description\": \"L.G BIOTRUE N\\u00b0 Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 80.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27450096_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"427010756\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LOMBARDI Didier\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Ducret Patrice\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"15-05-2023\", \"security_number\": null, \"invoice_issuer\": \"MONSIEUR LOMBARDI DIDIER OSTEOPATHE DO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"une s\\u00e9ance dans mon cabinet\", \"quantity\": 1, \"date_of_service\": \"15-05-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27473497_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870000239\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Paul HAUDRY\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"AGUILLON Agnes\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"24-05-2023\", \"security_number\": \"2521224322092 45\", \"invoice_issuer\": \"Paul HAUDRY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27516402_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"480000587\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maeva JONARD\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PASQUALINI Vincent\", \"beneficiary_dob\": null, \"invoice_date\": \"03-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Maeva JONARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-06-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27543561_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27543561_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910003128\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Johanna LOYER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LUTROT AMELIE\", \"beneficiary_dob\": \"01-04-1992\", \"invoice_date\": \"19-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Johanna LOYER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-06-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27564694_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"947484846\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Herv\\u00e9 De Lellis\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GOKEL Nurullah\", \"beneficiary_dob\": \"09-05-1972\", \"invoice_date\": \"23-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Herv\\u00e9 De Lellis\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-06-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27575321_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440009363\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ANGIN Paul\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"KEFIFA GISELE\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"19-06-2023\", \"security_number\": \"150124900708452\", \"invoice_issuer\": \"ANGIN Paul\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'Ost\\u00e9opathie et M\\u00e9decine Chinoise\", \"quantity\": 1, \"date_of_service\": \"19-06-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27591022_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"947484846\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Herv\\u00e9 De Lellis\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GOKEL Nurullah\", \"beneficiary_dob\": \"09-05-1972\", \"invoice_date\": \"23-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Herv\\u00e9 De Lellis\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-06-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27596405_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940012883\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emma GOUMET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JOURDAN Elodie\", \"beneficiary_dob\": \"19-10-1985\", \"invoice_date\": \"16-05-2023\", \"security_number\": null, \"invoice_issuer\": \"Madame Emma GOUMET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-05-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27596586_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"760003483\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Simon FAMMERY\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"CLERGERIE Patricia\", \"insured_dob\": null, \"beneficiary_name\": \"CLERGERIE Patricia\", \"beneficiary_dob\": null, \"invoice_date\": \"04-07-2023\", \"security_number\": \"266097638436443\", \"invoice_issuer\": \"Simon FAMMERY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie effectu\\u00e9e ce jour\", \"quantity\": 1, \"date_of_service\": \"04-07-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27601827_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620000877\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emmanuel LACROIX\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"JEAN PIERRE NATON\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"04-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Emmanuel LACROIX\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27604021_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"650000342\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe Hournarette\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHARTRAIN Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Philippe Hournarette\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27608383_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130002744\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien Conchin\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Benali Silv\\u00e8re\", \"beneficiary_dob\": null, \"invoice_date\": \"30-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Julien Conchin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une visite + soins\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27619845_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660004425\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florine LEHEU\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RUIZ Emmanuelle\", \"beneficiary_dob\": null, \"invoice_date\": \"11-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Florine LEHEU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27622860_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"840000517\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jo\\u00eblle Roche\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PACE LAPIERE CHRISTEL\", \"beneficiary_dob\": null, \"invoice_date\": \"21-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Jo\\u00eblle Roche\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-06-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27633719_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"317048213\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORIAN GEFFROY\", \"total_billed\": 63, \"bill_paid\": true, \"amount_paid\": 63, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Leroux Jane\", \"beneficiary_dob\": \"05-06-1938\", \"invoice_date\": \"06-07-2023\", \"security_number\": null, \"invoice_issuer\": \"E.I. FLORIAN GEFFROY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 63}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27640153_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"377015698\", \"rpps_number\": \"10005916282\", \"doctor_name\": \"Adrien Deniau\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"insured_name\": \"Frideloux Amandine\", \"insured_dob\": \"20-08-1991\", \"beneficiary_name\": \"Frideloux Amandine\", \"beneficiary_dob\": \"20-08-1991\", \"invoice_date\": \"15-07-2023\", \"security_number\": \"291083705034993\", \"invoice_issuer\": \"Adrien Deniau\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"date_of_service\": \"15-07-2023\", \"amount\": 60}, {\"description\": \"Majoration Dimanche et Jours F\\u00e9ri\\u00e9s\", \"date_of_service\": \"15-07-2023\", \"amount\": 0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27641286_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"2A0000386\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Christopher Huguet-Campana\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Scurbirdri Mvuille\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"05-07-2023\", \"security_number\": \"229089935407085\", \"invoice_issuer\": \"Christopher Huguet-Campana\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-07-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27647591_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"9D7023800\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GRANDCAMP J\\u00e9r\\u00e9my\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LARATTE Patrick\", \"beneficiary_dob\": null, \"invoice_date\": \"10-07-2023\", \"security_number\": null, \"invoice_issuer\": \"GRANDCAMP J\\u00e9r\\u00e9my\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-07-2023\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27650984_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950002717\", \"rpps_number\": \"10001320240\", \"finess_number\": null, \"doctor_name\": \"Jean L\\u00ca\", \"total_billed\": 115, \"bill_paid\": true, \"amount_paid\": 115, \"mandatory_coverage\": 55, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"Desjardins Jean-Luc\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"24-07-2023\", \"security_number\": \"1490180021112\", \"invoice_issuer\": \"Docteur Jean L\\u00ca\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation M\\u00e9dicale\", \"quantity\": 1, \"date_of_service\": \"24-07-2023\", \"mandatory_coverage\": 55, \"amount\": 55}, {\"description\": \"Soins d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"24-07-2023\", \"mandatory_coverage\": 0, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27661590_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910003649\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Hugo MONFORT\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AMBAS Laetitia\", \"beneficiary_dob\": null, \"invoice_date\": \"01-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Hugo MONFORT Ost\\u00e9opathe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27667405_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HILBOLD Nicole\", \"beneficiary_dob\": \"15-07-1948\", \"invoice_date\": \"01-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27688568_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950007849\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marine Gouffaud\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"De Sousa Dias Bruno\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Marine Gouffaud\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie acquitt\\u00e9e\", \"quantity\": 1, \"date_of_service\": \"11-08-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27689832_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770010700\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Myriam GULRAIZ\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"RAOUT Ethan\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": \"02-07-2012\", \"invoice_date\": \"14-08-2023\", \"security_number\": \"285119304805058\", \"invoice_issuer\": \"Myriam GULRAIZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27690522_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330017385\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cl\\u00e9ment FERME\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOISTEL Fanny\", \"beneficiary_dob\": \"18-03-1994\", \"invoice_date\": \"09-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Cl\\u00e9ment FERME\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27691255_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Denis Arlabosse\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Patrick Gardes\", \"beneficiary_dob\": null, \"invoice_date\": \"10-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Denis Arlabosse\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance en Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27694953_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"29 0000769\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Virginie DEFENDI\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Morgane AUDUSSEAU\", \"insured_dob\": \"18-11-1989\", \"beneficiary_name\": \"Morgane AUDUSSEAU\", \"beneficiary_dob\": \"18-11-1989\", \"invoice_date\": \"07-08-2023\", \"security_number\": \"2 89 11 49 007 251 42\", \"invoice_issuer\": \"Virginie DEFENDI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-08-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27698460_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690021555\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laur\\u00e8ne PAUCOD\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HERUAIS Fanny\", \"beneficiary_dob\": null, \"invoice_date\": \"18-08-2023\", \"security_number\": null, \"invoice_issuer\": \"AMANDINE RESTIF\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathic treatment\", \"quantity\": null, \"date_of_service\": \"18-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27698461_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920014909\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Paul Hurtaud\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GURWICZ Caroline\", \"beneficiary_dob\": null, \"invoice_date\": \"18-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Paul Hurtaud - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27698658_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"840000509\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RIVET Benoit\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Khadhaouria Makram\", \"beneficiary_dob\": null, \"invoice_date\": \"19-08-2023\", \"security_number\": null, \"invoice_issuer\": \"RIVET Benoit E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en Ost\\u00e9opathie en esp\\u00e8ces\", \"quantity\": 1, \"date_of_service\": \"19-08-2023\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27699394_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770008167\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Yoann Vitone\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"02-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Yoann Vitone Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"02-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27699506_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060011608\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Damien Cuche\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DOSSMANN Nathan\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Damien Cuche\", \"currency\": \"EUR\", \"items\": [{\"description\": \"11-08-2023 - S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-08-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27699508_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060011608\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Damien Cuche\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DOSSMANN Romain\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Damien Cuche\", \"currency\": \"EUR\", \"items\": [{\"description\": \"11-08-2023 - S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-08-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27699922_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440010874\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TABUTEAU Mathieu\", \"total_billed\": 62, \"bill_paid\": true, \"amount_paid\": 62, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"El Jamri Rizlen\", \"beneficiary_dob\": \"10-05-1980\", \"invoice_date\": \"17-08-2023\", \"security_number\": null, \"invoice_issuer\": \"TABUTEAU Mathieu\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"17-08-2023\", \"mandatory_coverage\": null, \"amount\": 62}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27700785_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780014270\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9cile Champion\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"RUPPERT Bruno\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2023\", \"security_number\": \"1 66 01 75 114 365 40\", \"invoice_issuer\": \"C\\u00e9cile Champion (El)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701053_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660002072\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Busca Martin\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": 0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 50, \"insured_name\": \"MANDOLE Gilberte\", \"insured_dob\": null, \"beneficiary_name\": \"MANDOLE Gilberte\", \"beneficiary_dob\": null, \"invoice_date\": \"09-08-2023\", \"security_number\": \"2 38 05 75 024 058 68\", \"invoice_issuer\": \"Busca Martin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701057_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950008763\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE Amandine\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUROUX Pascal\", \"beneficiary_dob\": null, \"invoice_date\": \"16-08-2023\", \"security_number\": null, \"invoice_issuer\": \"LE Amandine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701064_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"400001475\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nicolas DEYTS\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"LABORDE Cyril\", \"insured_dob\": \"12-11-1994\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"10-08-2023\", \"security_number\": \"194114019226951\", \"invoice_issuer\": \"Nicolas DEYTS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"une consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"10-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701076_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130011679\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Camille LENTREIN\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Patrice PEREZ\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"09-08-2023\", \"security_number\": \"182109102705446\", \"invoice_issuer\": \"Camille LENTREIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701083_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130018724\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Natacha SOUTERNON\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"G\\u00e9rard REYNES\", \"insured_dob\": \"12-06-1960\", \"beneficiary_name\": \"G\\u00e9rard REYNES\", \"beneficiary_dob\": \"12-06-1960\", \"invoice_date\": \"12-07-2023\", \"security_number\": \"1600681060115\", \"invoice_issuer\": \"Natacha SOUTERNON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-07-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701162_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"M.EYMARD Tristan\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"1880169256020\", \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701163_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330015991\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Am\\u00e9lie Reboul\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUNARD CRISTON\", \"beneficiary_dob\": \"12-01-1988\", \"invoice_date\": \"23-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Am\\u00e9lie Reboul\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-06-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701181_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"927014985\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Olivier RABACHE\", \"total_billed\": 63, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BADYNSKI Colette\", \"beneficiary_dob\": null, \"invoice_date\": \"16-08-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OSTEOPATHIE Olivier RABACHE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d' ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-08-2023\", \"mandatory_coverage\": null, \"amount\": 63}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701197_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660000746\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PENCHET Jean-Christophe\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"RIMBAUD Josette\", \"insured_dob\": null, \"beneficiary_name\": \"RIMBAUD Josette\", \"beneficiary_dob\": null, \"invoice_date\": \"10-08-2023\", \"security_number\": \"239026419100168\", \"invoice_issuer\": \"Mr PENCHET Jean-Christophe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701213_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"560007098\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Camille LE BOURHIS\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FOSSE Gr\\u00e9gory\", \"insured_dob\": \"15-11-1976\", \"beneficiary_name\": \"FOSSE Gr\\u00e9gory\", \"beneficiary_dob\": \"15-11-1976\", \"invoice_date\": \"02-08-23\", \"security_number\": \"176 11 62 178 023 35\", \"invoice_issuer\": \"LE BOURHIS Camille\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"02-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701301_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810000729\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HUGUET Arnaud\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Mast Jean Claude\", \"insured_dob\": null, \"beneficiary_name\": \"Mast Jean Claude\", \"beneficiary_dob\": null, \"invoice_date\": \"12-08-2023\", \"security_number\": \"178 0862 119 056 74\", \"invoice_issuer\": \"Mr HUGUET Arnaud\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Traitement Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"12-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701303_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"270002371\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Madenne GILLON\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEDUEY Myriam\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Madenne GILLON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins d'ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": \"11-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701314_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"850002247\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Karen BARBARIT\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Prouheze Juliette\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"10-08-2023\", \"security_number\": \"235017511480106\", \"invoice_issuer\": \"Karen BARBARIT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": null, \"date_of_service\": \"10-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701350_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660002072\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Busca Martin\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Malvezy Michele\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"08-08-2023\", \"security_number\": \"258026613600748\", \"invoice_issuer\": \"Busca Martin\", \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701380_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"260000294\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Claire BETRIU\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Daphn\\u00e9 WEBER\", \"beneficiary_dob\": \"03-03-1975\", \"invoice_date\": \"27-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Claire BETRIU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"27-06-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701660_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620003541\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Antoine DELELIS\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROBILLARD Mathieu\", \"beneficiary_dob\": \"26-03-1997\", \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Antoine DELELIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701669_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"250000049\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Philippe SIRON\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MONNOT Pierre\", \"beneficiary_dob\": \"26-12-1948\", \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Jean-Philippe SIRON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701675_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330002791\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9cile MONZON\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"PASSICOS Bernard\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": \"1420640188003\", \"invoice_issuer\": \"C\\u00e9cile MONZON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701677_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770004018\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Manotte Na\\u00eblla\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MESSAOUDI Shirine\", \"beneficiary_dob\": \"05-12-1988\", \"invoice_date\": \"16-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Manotte Na\\u00eblla\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701692_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670003391\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-No\\u00ebl JEDELE\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"KEHREN MURIEL\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"04-08-2023\", \"security_number\": \"281116718003122\", \"invoice_issuer\": \"Jean-No\\u00ebl JEDELE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acte d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-08-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701698_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"310013073\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pauline FONDEUR\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"STATTNER Karim\", \"insured_dob\": \"31-10-1982\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"19-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Pauline FONDEUR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-06-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701700_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630003986\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Henri PELLETIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"BURLAND Lionel\", \"insured_dob\": \"04-06-1980\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"16-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Henri PELLETIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"16-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701825_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"210001079\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas GARDES\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"HALGRIN S\\u00e9bastien\", \"insured_dob\": null, \"beneficiary_name\": \"HALGRIN S\\u00e9bastien\", \"beneficiary_dob\": null, \"invoice_date\": \"11-07-2023\", \"security_number\": \"1810793045031 58\", \"invoice_issuer\": \"Thomas GARDES Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-07-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701844_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780000493\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Eva Araujo-Branellec\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"DUBOIS Fabrice\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": \"173085918312956\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701846_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670005065\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ilona PRUD'HOMME\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAROCH Maxime\", \"beneficiary_dob\": null, \"invoice_date\": \"03-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Ilona PRUD'HOMME Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701854_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701854_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770008894\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pires Julie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alexia CHOMETTE\", \"beneficiary_dob\": \"02-11-1997\", \"invoice_date\": \"30-05-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-05-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770008894\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julie Pires\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alexia CHOMETTE\", \"beneficiary_dob\": \"02-11-1997\", \"invoice_date\": \"17-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-06-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701855_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000420\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"REMERY Claire\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAURENCON G\\u00e9rald\", \"beneficiary_dob\": \"28/09/2003\", \"invoice_date\": \"15/06/2023\", \"security_number\": null, \"invoice_issuer\": \"REMERY Claire\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie pour LAURENCON G\\u00e9rald le 15/06/2023 en Carte bancaire\", \"quantity\": 1, \"date_of_service\": \"15-06-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701865_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910000124\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexandra Pillosio\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MIGAYROU Paul\", \"beneficiary_dob\": null, \"invoice_date\": \"16-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Alexandra Pillosio\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701901_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"420002305\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Adeline SERALINI\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Michallet Charl\\u00e8re\", \"beneficiary_dob\": null, \"invoice_date\": \"04-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Adeline SERALINI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-07-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701911_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950008235\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Margaux LEMARCHAND\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DUMOULIN Ang\\u00e9lique\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"19-08-2023\", \"security_number\": \"294015430421537\", \"invoice_issuer\": \"Margaux LEMARCHAND\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701935_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770005031\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LAFAYE Floris\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bamely Julia\", \"beneficiary_dob\": \"25-07-2023\", \"invoice_date\": \"09-08-2023\", \"security_number\": \"223077713125392\", \"invoice_issuer\": \"LAFAYE Floris\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"09-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701986_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"840000547\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anissa LARABI\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PELFIER PEREZ\", \"beneficiary_dob\": null, \"invoice_date\": \"07-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Anissa LARABI Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701988_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780000642\", \"doctor_name\": \"Jacques HALLUIN\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"beneficiary_name\": \"ALERTE Jo\\u00ebl\", \"beneficiary_dob\": null, \"invoice_date\": \"12-08-2023\", \"invoice_issuer\": \"Jacques HALLUIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance\", \"amount\": 45}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27701997_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780006920\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"David Sanchez\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LIPPERT Patricia\", \"beneficiary_dob\": \"29-08-1964\", \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"David Sanchez Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702201_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"450003033\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"AUCLAIR Lucie\", \"total_billed\": 66, \"bill_paid\": true, \"amount_paid\": 66, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Larras Clous\", \"beneficiary_dob\": null, \"invoice_date\": \"12-07-2023\", \"security_number\": null, \"invoice_issuer\": \"AUCLAIR Lucie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-07-2023\", \"mandatory_coverage\": null, \"amount\": 66}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702202_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"450003033\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"AUCLAIR Lucie\", \"total_billed\": 66, \"bill_paid\": true, \"amount_paid\": 66, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LARRAS Clous\", \"beneficiary_dob\": null, \"invoice_date\": \"14-08-2023\", \"security_number\": null, \"invoice_issuer\": \"AUCLAIR Lucie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-08-2023\", \"mandatory_coverage\": null, \"amount\": 66}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702547_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570001560\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PAULY In\\u00e8s El\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"ADREANI C\\u00e9line\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"05-06-2023\", \"security_number\": \"274035746303069\", \"invoice_issuer\": \"Olivier DUPARCQ El In\\u00e8s PAULY El OSTEOPATHES D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - s\\u00e9ance de ADREANI C\\u00e9line le 05/06/2023\", \"quantity\": 1, \"date_of_service\": \"05-06-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702698_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750001497\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"EI TRISTAN COSTA\", \"total_billed\": 85, \"bill_paid\": true, \"amount_paid\": 85, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PATRICE DECROZE\", \"beneficiary_dob\": null, \"invoice_date\": \"07-08-2023\", \"security_number\": null, \"invoice_issuer\": \"EI TRISTAN COSTA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SOINS EFFECTUES\", \"quantity\": null, \"date_of_service\": \"07-08-2023\", \"mandatory_coverage\": null, \"amount\": 85}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702699_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750001497\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"EI TRISTAN COSTA\", \"total_billed\": 85, \"bill_paid\": true, \"amount_paid\": 85, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PATRICE DECROZE\", \"beneficiary_dob\": null, \"invoice_date\": \"14-08-2023\", \"security_number\": null, \"invoice_issuer\": \"EI TRISTAN COSTA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SOINS EFFECTUES CE JOUR\", \"quantity\": null, \"date_of_service\": \"14-08-2023\", \"mandatory_coverage\": null, \"amount\": 85}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702756_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620000471\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabien KRUPA\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FOVEAU C\\u00e9dric\", \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Fabien KRUPA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702777_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950009530\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ZEBLI MOHAMAD KOULOUD\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 50, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Yezekael VIGNANE\", \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"ZEBLI MOHAMAD KOULOUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702790_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350008629\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Josepha Le Pris\\u00e9\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUMAS BARBIER Rapha\\u00eblle\", \"beneficiary_dob\": \"06-07-1982\", \"invoice_date\": \"22-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet Coudray Cauchy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702792_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350009999\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aude Moutel\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"BERTHIER Alexandra\", \"insured_dob\": \"22-07-1996\", \"beneficiary_name\": \"BERTHIER Alexandra\", \"beneficiary_dob\": \"22-07-1996\", \"invoice_date\": \"18-08-2023\", \"security_number\": \"296073523884555\", \"invoice_issuer\": \"Aude Moutel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702811_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"180000697\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Elodie BRIAND\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GERMAIN Nathalie\", \"beneficiary_dob\": null, \"invoice_date\": \"08-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Elodie BRIAND Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27702876_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"250003274\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BLANC Laurene\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JAILLET Pierre-Philippe\", \"beneficiary_dob\": null, \"invoice_date\": \"12-08-2023\", \"security_number\": null, \"invoice_issuer\": \"BLANC Laurene\", \"currency\": \"EUR\", \"items\": [{\"description\": \"NOTE D'HONORAIRES\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703086_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"450003629\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BOUCHTANE Ryan\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DALLEAU Josita\", \"beneficiary_dob\": null, \"invoice_date\": \"26-07-2023\", \"security_number\": null, \"invoice_issuer\": \"BOUCHTANE Ryan\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703114_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330008954\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TOSON St\\u00e9phane\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ORSONI Lorenzo\", \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Monsieur TOSON St\\u00e9phane\", \"currency\": \"EUR\", \"items\": [{\"description\": \"acte ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703118_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"877004333\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean Pierre LAMBERT\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"LHOMME Pierre Jean\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"17-08-2023\", \"security_number\": \"172098708509610\", \"invoice_issuer\": \"Monsieur Jean Pierre LAMBERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703127_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630002004\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"OMERIN Thomas\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Olivier LONDIN\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2023\", \"security_number\": null, \"invoice_issuer\": \"OMERIN Thomas\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION OSTEOPATHIQUE\", \"quantity\": 1, \"date_of_service\": \"11-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703138_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220001796\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Perrine OWEIDAT\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"St\\u00e9phan Iwen\", \"beneficiary_dob\": null, \"invoice_date\": \"22-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Perrine OWEIDAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"22-08-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703139_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220001796\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Perrine OWEIDAT\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Stephan Lydia\", \"beneficiary_dob\": null, \"invoice_date\": \"04-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Perrine OWEIDAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-07-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703140_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220001796\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Perrine OWEIDAT\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lydia Stephan\", \"beneficiary_dob\": null, \"invoice_date\": \"21-03-2023\", \"security_number\": null, \"invoice_issuer\": \"Perrine OWEIDAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-03-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703141_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220001796\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Perrine OWEIDAT\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Maina Stephan\", \"beneficiary_dob\": null, \"invoice_date\": \"05-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Perrine OWEIDAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-07-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703142_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220001796\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Perrine OWEIDAT\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Maina Stiphan\", \"beneficiary_dob\": null, \"invoice_date\": \"21-02-2023\", \"security_number\": null, \"invoice_issuer\": \"Perrine OWEIDAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-02-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703145_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"820000032\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pascal PESSELON\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"MALLEVIALLE Catherine\", \"insured_dob\": null, \"beneficiary_name\": \"MALLEVIALLE Catherine\", \"beneficiary_dob\": null, \"invoice_date\": \"20-07-2023\", \"security_number\": \"24909368829937\", \"invoice_issuer\": \"Pascal PESSELON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"20-07-2023\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703149_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910001734\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nicolas PINSON\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DELAERE Pascal\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"07-08-2023\", \"security_number\": \"1540578607051\", \"invoice_issuer\": \"NICOLAS PINSON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703160_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"720002583\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Le Borgne Guillaume\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NORMAND Kevin\", \"beneficiary_dob\": \"24-11-1995\", \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Le Borgne Guillaume\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703164_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130018765\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Benoit AYVAZIAN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MILLE Jessica\", \"beneficiary_dob\": \"16-07-2000\", \"invoice_date\": \"22-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Benoit AYVAZIAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"22-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703393_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330013012\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laura LOISON\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FREMONT Ren\\u00e9-Alain\", \"beneficiary_dob\": \"03-05-1951\", \"invoice_date\": \"17-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Laura LOISON EI Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703581_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950003194\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"IQBAL Asad\", \"total_billed\": 65, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FLERIN Claudine\", \"insured_dob\": \"02-12-1948\", \"beneficiary_name\": \"PLERIN Claudine\", \"beneficiary_dob\": \"02-12-1948\", \"invoice_date\": \"14-08-2023\", \"security_number\": \"248129721100142\", \"invoice_issuer\": \"IQBAL Asad\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703582_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950003194\", \"doctor_name\": \"IQBAL Asad\", \"total_billed\": 65, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FLERIN Claudine\", \"insured_dob\": \"02-12-1948\", \"beneficiary_name\": \"FLERIN Claudine\", \"beneficiary_dob\": \"02-12-1948\", \"invoice_date\": \"08-08-2023\", \"security_number\": \"248129721100142\", \"invoice_issuer\": \"IQBAL Asad\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27703931_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770009017\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GUYOT Charl\\u00e8ne\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHARLES Alexandra\", \"beneficiary_dob\": \"01-10-1991\", \"invoice_date\": \"17-08-2023\", \"security_number\": null, \"invoice_issuer\": \"GUYOT Charl\\u00e8ne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27704420_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003970\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"In\\u00e8s Frioui\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Periquet Jordane\", \"beneficiary_dob\": null, \"invoice_date\": \"16-08-2023\", \"security_number\": null, \"invoice_issuer\": \"FRIOUI In\\u00e8s\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"16-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705292_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"420005993\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tristan LASSABLIERE\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MALDANT Johan\", \"beneficiary_dob\": \"23-10-1999\", \"invoice_date\": \"22-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Tristan LASSABLIERE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie Adultes\", \"quantity\": null, \"date_of_service\": \"22-08-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705303_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810002717\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LECLERCQ Maxime\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DEGAVE Sophie\", \"insured_dob\": null, \"beneficiary_name\": \"DEGAVE Sophie\", \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": \"278086061204939\", \"invoice_issuer\": \"LECLERCQ Maxime\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705380_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"600001333\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CUEILLE ROMAIN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARION Alexandre\", \"beneficiary_dob\": null, \"invoice_date\": \"22-08-2023\", \"security_number\": null, \"invoice_issuer\": \"CUEILLE ROMAIN\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705389_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830001822\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Raynald LAVAYSSIERE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"22-08-2022\", \"security_number\": null, \"invoice_issuer\": \"Raynald LAVAYSSIERE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"22-08-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705426_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"110003043\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Gladys Ladet\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": 0, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mar\\u00e9chal Stephanie\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Gladys Ladet\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"22-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705711_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BASSELIN Patrick\", \"beneficiary_dob\": \"30-08-1952\", \"invoice_date\": \"03-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705725_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060006277\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anouk EVENE\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ARMENGAUD Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Anouk EVENE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/conseilmobile_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c691b973-9ac9-4fc3-aaf2-d6dde41c98e0_livret_de_famille-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 27045165-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB-azure-construction_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/64aff6629fc4fec77383e720_contrat-de-licence-de-marque_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Etrangers-Vos-demarches_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-LUH_2019-1-1440x2020_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB_LBP_BC-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 27052816-4_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8c0d32a4-a14c-4294-9130-d34cc8fcdffa_Piece_identite__livret_famille-3_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 26773427_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705750_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910003565\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Johann RAYNER\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE MASLE Tiago\", \"beneficiary_dob\": null, \"invoice_date\": \"23-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Johann RAYNER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"23-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705767_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BASSELIN Patrick\", \"beneficiary_dob\": \"30-08-1952\", \"invoice_date\": \"03-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705774_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660000464\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas BERTHOME\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MITJANA Stephane\", \"beneficiary_dob\": \"28-11-1979\", \"invoice_date\": \"23-08-2023\", \"security_number\": null, \"invoice_issuer\": \"CENTRE VITAL\\u00c9O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins ost\\u00e9opathiques\", \"quantity\": 1, \"date_of_service\": \"23-08-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705806_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330013400\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nicolas GASPAROTTO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Yann ROUVERY\", \"beneficiary_dob\": \"16-03-1982\", \"invoice_date\": \"22-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Nicolas GASPAROTTO (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"22-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705904_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060000536\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORANCE Blanche-Ma\\u00eblle\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GARCIA Romain\", \"beneficiary_dob\": \"06-02-2000\", \"invoice_date\": \"04-02-2023\", \"security_number\": null, \"invoice_issuer\": \"FLORANCE Blanche-Ma\\u00eblle\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705924_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300006137\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marine CLEMENT\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": 0.0, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DAMIENS Am\\u00e9lie\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"23-08-2023\", \"security_number\": \"2840391027065\", \"invoice_issuer\": \"Marine CLEMENT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705926_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"860002005\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"K\\u00e9vin Martinent\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gomes Everdon\", \"beneficiary_dob\": null, \"invoice_date\": \"23-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Centre d'Ost\\u00e9opathie du Blanc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705928_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350008512\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Kerboul Benjamin\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gw\\u00e9na\\u00eblle Tanguy\", \"beneficiary_dob\": \"27-10-1982\", \"invoice_date\": \"19-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Saint-Thurial\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705929_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350000485\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thierry Buridant\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gwenaelle Tanguy\", \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Thierry Buridant\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie et de massage r\\u00e9flexe du tissu conjonctif\", \"quantity\": 1, \"date_of_service\": \"21-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705969_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660002163\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aymeric Ferrer\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Herve SCRIBE\", \"beneficiary_dob\": \"27-05-1970\", \"invoice_date\": \"23-08-2023\", \"security_number\": null, \"invoice_issuer\": \"El Aymeric Ferrer Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"23-08-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27705973_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670000660\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"S\\u00e9bastien HAUBER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"JOACHIM Maxime\", \"insured_dob\": null, \"beneficiary_name\": \"JOACHIM Maxime\", \"beneficiary_dob\": null, \"invoice_date\": \"18-08-2023\", \"security_number\": \"1970767582130\", \"invoice_issuer\": \"HAUBER S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27706012_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"390000834\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fran\\u00e7ois VOITEY\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"PERNIN Jean-Marc\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"05-08-2023\", \"security_number\": \"1480239477001\", \"invoice_issuer\": \"Fran\\u00e7ois VOITEY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"05-08-2023\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27706035_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780000246\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"El Samantha GODART\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"D\\u00c9TANTE Eric\", \"insured_dob\": \"21-12-1962\", \"beneficiary_name\": \"D\\u00c9TANTE Eric\", \"beneficiary_dob\": \"21-12-1962\", \"invoice_date\": \"17-08-2023\", \"security_number\": \"1621275114417\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-08-2023\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27706096_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"890000136\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Christophe DERBIN\", \"total_billed\": 57.0, \"bill_paid\": true, \"amount_paid\": 57.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ruimard Nickel\", \"beneficiary_dob\": null, \"invoice_date\": \"18-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Christophe DERBIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-08-2023\", \"mandatory_coverage\": null, \"amount\": 57.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27706471_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330007667\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GOIMARD MARC-KEVIN\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUBOSQ Cyrille\", \"beneficiary_dob\": null, \"invoice_date\": \"24-08-2023\", \"security_number\": null, \"invoice_issuer\": \"GOMARD MARC-KEVIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"24-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27708288_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"667009948\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurent CHANET\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Diane ENNUYER\", \"beneficiary_dob\": null, \"invoice_date\": \"23-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Laurent CHANET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Actes d'ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": \"23-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27708327_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"400003463\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PREDOT CHLOE\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FEMENIA LUCIEN\", \"insured_dob\": null, \"beneficiary_name\": \"FEMENIA LUCIEN\", \"beneficiary_dob\": null, \"invoice_date\": \"23-08-2023\", \"security_number\": \"138059935000624\", \"invoice_issuer\": \"PREDOT CHLOE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27708551_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"250003134\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Xavier Hans\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PREVALET Laura\", \"beneficiary_dob\": \"25-01-1999\", \"invoice_date\": \"15-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Xavier Hans - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27708936_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"297023434\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Carval Benjamin\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"TALBODEC ALEXIANE\", \"insured_dob\": null, \"beneficiary_name\": \"TALBODEC ALEXIANE\", \"beneficiary_dob\": null, \"invoice_date\": \"28-03-2023\", \"security_number\": \"203116061212302\", \"invoice_issuer\": \"Benjamin Carval\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie ce jour\", \"quantity\": 1, \"date_of_service\": \"28-03-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27708937_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"290002344\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HAAS Priscilla\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Tallec Sacha\", \"insured_dob\": null, \"beneficiary_name\": \"Tallec Sacha\", \"beneficiary_dob\": null, \"invoice_date\": \"21-04-2023\", \"security_number\": \"1710892035054\", \"invoice_issuer\": \"HAAS PRISCILLA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-04-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27709304_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"720000322\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aurelien Sabouret\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Perrin Camille\", \"beneficiary_dob\": null, \"invoice_date\": \"07-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Karelle CHHIM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27709419_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590010419\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Constance AUDEGOND\", \"total_billed\": 65, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOUKAERT ISABELLE\", \"beneficiary_dob\": null, \"invoice_date\": \"23-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Constance AUDEGOND\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION\", \"quantity\": 1, \"date_of_service\": \"23-08-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27710953_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770000750\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9line LE MAUF-VANNEAU\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FORAISON Ys\\u00e9e\", \"beneficiary_dob\": null, \"invoice_date\": \"22-08-2023\", \"security_number\": null, \"invoice_issuer\": \"C\\u00e9line LE MAUF-VANNEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'osteopathie\", \"quantity\": 1, \"date_of_service\": \"22-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27711119_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27711119_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"877000109\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"J\\u00e9r\\u00f4me FOLLET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DAUDON JEROME\", \"insured_dob\": null, \"beneficiary_name\": \"DAUDON JEROME\", \"beneficiary_dob\": null, \"invoice_date\": \"22-08-2023\", \"security_number\": \"172113604605709\", \"invoice_issuer\": \"J\\u00e9r\\u00f4me FOLLET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27711297_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"040000838\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mike DOREAU\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RASTOLL Damien\", \"beneficiary_dob\": null, \"invoice_date\": \"07-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Mike DOREAU - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27713498_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810002410\", \"rpps_number\": \"10002903689\", \"doctor_name\": \"Garcia J\\u00e9r\\u00f4me\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"invoice_date\": \"25-08-2023\", \"invoice_issuer\": \"Docteur J\\u00e9r\\u00f4me GARCIA\", \"beneficiary_name\": \"Peyre Amandine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"amount\": 50, \"quantity\": 1, \"date_of_service\": \"25-08-2023\", \"mandatory_coverage\": null}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27715906_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340011311\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas ANDRE\", \"total_billed\": 70.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CAUVY Emma\", \"beneficiary_dob\": null, \"invoice_date\": \"19-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Thomas ANDRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie consultation\", \"quantity\": 1, \"date_of_service\": \"19-08-2023\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB-TESTNHIDEUX-UN-9-4-2025_152202_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/modele-cv-professionnel_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/mobilenew3_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/V1_CV-1_jpeg.original_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e80b73a5-6fc6-4c27-88d1-389ed405e50d_livret_de_famille-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/modele-contrat-de-travail-plein-temps-city-management-lausanne_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 26616051-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/EdiAttAssPermCont_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/exemple-de-facture-e-commerce_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 27052816-3_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27715953_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350003943\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pauline Rossi\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean-Philippe BENASSIS \", \"beneficiary_dob\": null, \"invoice_date\": \"14-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Pauline Rossi\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27716116_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130012008\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathilde MAZAKA\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BARRIELLE Alain\", \"beneficiary_dob\": null, \"invoice_date\": \"26-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Mathilde MAZAKA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"26-07-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27717154_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Radiologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 52.8, \"bill_paid\": true, \"amount_paid\": 26.34, \"mandatory_coverage\": 26.46, \"complementary_coverage\": null, \"client_part\": 26.34, \"remaining_payment\": 0, \"insured_name\": \"FERRER Julien\", \"insured_dob\": \"02-05-1996\", \"beneficiary_name\": \"FERRER Julien\", \"beneficiary_dob\": \"02-05-1996\", \"invoice_date\": \"17-08-2023\", \"security_number\": \"196057511947622\", \"invoice_issuer\": \"IMAGERIE M\\u00c9DICALE RAMBOT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"QZQM001\", \"quantity\": 1, \"date_of_service\": \"17-08-2023\", \"mandatory_coverage\": null, \"amount\": 52.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27717173_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950000125\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Carollne Leloup\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAIGNAN-DERVAUX Juliette\", \"beneficiary_dob\": null, \"invoice_date\": \"07-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Caroline Leloup ost\\u00e9opathe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-07-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27718494_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440007441\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emilie FOURRIER\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BLIN L\\u00e9onie\", \"beneficiary_dob\": null, \"invoice_date\": \"28-08-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE FOURRIER EMILIE EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"28-08-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27093974_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"R\\u00e9flexologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"V\\u00e9ronique Jubault\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Morgane Monserie\", \"beneficiary_dob\": null, \"invoice_date\": \"16-01-2023\", \"security_number\": null, \"invoice_issuer\": \"Veronique JUBAULT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de sophrologie\", \"quantity\": 1, \"date_of_service\": \"16-01-2023\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27125683_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"V\\u00e9ronique JUBAULT\", \"total_billed\": 40.0, \"bill_paid\": true, \"amount_paid\": 40.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Morgane Monserie\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2023\", \"security_number\": null, \"invoice_issuer\": \"V\\u00e9ronique JUBAULT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance 9e Sophrologie\", \"quantity\": 1, \"date_of_service\": \"30-01-2023\", \"mandatory_coverage\": null, \"amount\": 40.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27184004_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300006848\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MORATALLA Gr\\u00e9gory\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LLORENS C\\u00e9cil\", \"beneficiary_dob\": \"21-01-1981\", \"invoice_date\": \"20-02-2023\", \"security_number\": null, \"invoice_issuer\": \"MORATALLA Gr\\u00e9gory\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en cabinet - 20/02/2023\", \"quantity\": 1, \"date_of_service\": \"20-02-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27210532_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940002843\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Benoit Mousset\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ABDELLI Youssef\", \"beneficiary_dob\": null, \"invoice_date\": \"27-02-2023\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Benoit Mousset\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation au cabinet d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-02-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27210534_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950004408\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHASSARA Marie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DASTE Nicolas\", \"insured_dob\": null, \"beneficiary_name\": \"DASTE Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2023\", \"security_number\": \"184051601503145\", \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Fosses\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"06-02-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27210535_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950004408\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHASSARA Marie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DASTE Nicola\", \"insured_dob\": null, \"beneficiary_name\": \"DASTE Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": \"20-02-2023\", \"security_number\": \"184051601503145\", \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Fosses\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"20-02-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27210547_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440007003\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pauline Foucaud\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 60.0, \"remaining_payment\": 0, \"insured_name\": \"Billet Brandon\", \"insured_dob\": \"08-06-1992\", \"beneficiary_name\": \"Billet Brandon\", \"beneficiary_dob\": \"08-06-1992\", \"invoice_date\": \"17-02-2023\", \"security_number\": null, \"invoice_issuer\": \"Pauline Foucaud\", \"currency\": \"EUR\", \"items\": [{\"description\": \"une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-02-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27210572_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130019631\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LURION Alexia\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MBAE SAID Nadjimou\", \"beneficiary_dob\": null, \"invoice_date\": \"28-02-2023\", \"security_number\": null, \"invoice_issuer\": \"LURION Alexia EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"28-02-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27210576_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"840002323\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sandra PETRINI-GERARD\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROMERA Julie\", \"beneficiary_dob\": \"14-07-1995\", \"invoice_date\": \"28-02-2023\", \"security_number\": null, \"invoice_issuer\": \"Sandra PETRINI-GERARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de chiropractie\", \"quantity\": 1, \"date_of_service\": \"28-02-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27210601_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"740000591\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fr\\u00e9d\\u00e9ric HALTER\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MONTAUD Remy\", \"beneficiary_dob\": null, \"invoice_date\": \"13-02-2023\", \"security_number\": null, \"invoice_issuer\": \"Fr\\u00e9d\\u00e9ric HALTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-02-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27210603_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Di\\u00e9t\\u00e9ticienne\", \"adeli_number\": \"339504813\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Audrey CHILLIARD\", \"total_billed\": 57.0, \"bill_paid\": true, \"amount_paid\": 57.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DI PASQUALE SPINELLI Sylviane\", \"insured_dob\": \"11-07-1980\", \"beneficiary_name\": \"DI PASQUALE SPINELLI Sylviane\", \"beneficiary_dob\": \"11-07-1980\", \"invoice_date\": \"28-02-2023\", \"security_number\": \"280076613613388\", \"invoice_issuer\": \"Centre RNPC Villenave d'Ornon\", \"currency\": \"EUR\", \"items\": [{\"description\": \"suivi\", \"quantity\": 1, \"date_of_service\": \"28-02-2023\", \"mandatory_coverage\": null, \"amount\": 57.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27210605_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660000811\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GAGNEUX M\\u00e9lina\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AMPOSTA Ingrid\", \"beneficiary_dob\": null, \"invoice_date\": \"15-02-2023\", \"security_number\": null, \"invoice_issuer\": \"GAGNEUX M\\u00e9lina\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-02-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ab26e47b-d267-439c-9b12-8699c17d5d47_facture_Aurelie_Claude_naturopathe_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ab26e47b-d267-439c-9b12-8699c17d5d47_facture_Aurelie_Claude_naturopathe_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Naturopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marine TEZIER\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Aur\\u00e9lie CLAUDE\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Marine TEZIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Bilan naturopathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4c7209da-d83b-416e-b7a2-2d3e409ad598_Schneider_Fred_250213_110050_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Isabelle WINCENT\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SCHNEIDER Fred\", \"beneficiary_dob\": null, \"invoice_date\": \"02-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Isabelle WINCENT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"02-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/073de3d4-2823-4413-a2d3-2dfe26c2a928_Osteo_Dutech_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"310019567\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lamusse Simon\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"David Dutech\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie de Simon LAMUSSE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3bae1b45-0f26-4056-b59c-a04c52679135_recu-fac-2025-1036_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690020029\", \"rpps_number\": \"10010706223\", \"finess_number\": null, \"doctor_name\": \"FRISTIOS Alexe\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PASSAROTTO Andrea\", \"beneficiary_dob\": \"06-06-1993\", \"invoice_date\": \"13-02-2025\", \"security_number\": null, \"invoice_issuer\": \"FRISTIOS Alexe El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation - Adulte Enfant - 13/02/2025\", \"quantity\": 1, \"date_of_service\": \"13-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8e86f61e-284f-4984-84ba-4de5883b0bc8_recu-nh-2025-02-015583_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"rpps_number\": \"10010436755\", \"finess_number\": null, \"doctor_name\": \"Etienne DUPONT\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEFEUVRE St\\u00e9phane\", \"beneficiary_dob\": \"18-09-1975\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Etienne DUPONT - Chiropracteur EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Chiropratique\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cd0e973a-a7d9-4202-b5f0-9a1fa979c69d_F-2025-0147_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Francois-Xavier Seytre\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Francois Bieules\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de therapie multidisciplinaire Hypnose sophro EMDR TCC Reflexologie El Cabinet de Therapie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Sophrologie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/88f6c32a-2ae7-476a-b62f-59f8f74c3289_Facture_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690007539\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERNARD Nicolas\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LOREAU Maxence\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BERNARD Nicolas E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de M. LOREAU Maxence\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c557951f-c4b7-4e04-a05c-871ace1f18af_F-2025-0065_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9siologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MARIE BERTAUD\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Charly Delaval\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"MARIE BERTAUD EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Premi\\u00e8re s\\u00e9ance kin\\u00e9siologie ENFANT\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1b105dcc-c530-4799-869e-28c15c685fab_Facture_osteo_Balmelle_Cassandra5_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930000146\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. BRISSET Morgan\", \"total_billed\": 130, \"bill_paid\": true, \"amount_paid\": 130, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BALMELLE Cassandra\", \"beneficiary_dob\": null, \"invoice_date\": \"28-01-2025\", \"security_number\": null, \"invoice_issuer\": \"M. BRISSET Morgan\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"s\\u00e9ance(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/61f2bafa-3885-4772-a80d-273eb8a572f4_1739442259629972807862712380347_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910000215\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Denis DELBRUN\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAUGARD LIONEL\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Denis DELBRUN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/48eb0cb0-6218-4263-a1bf-a75c4b2cfc3d_Aurelie_CLOTAULT_250213003_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010548674\", \"finess_number\": null, \"doctor_name\": \"Aurelie CLOTAULT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Jean- Pierre P\\u00e9ron\", \"beneficiary_dob\": null, \"invoice_date\": \"13-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Aurelie CLOTAULT E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ac6d3ddf-5f2a-4a82-97d9-aabe6be097e3_frais_hypnose_MARIE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"EDIAR Laure\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"BENOIT Marie\", \"insured_dob\": null, \"beneficiary_name\": \"BENOIT Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"2030724037058\", \"invoice_issuer\": \"Hypnose Bergerac\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de hypnoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1d34d985-38b5-4563-9af4-7db1cfba3c6f_FACTURE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"870000817\", \"rpps_number\": \"10010618220\", \"finess_number\": null, \"doctor_name\": \"Charles GOLFIER\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROBBE Jean Luc\", \"beneficiary_dob\": \"27-07-1962\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Charles GOLFIER - OSTEOPATHE D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3202f28e-41f8-4145-b524-7c9af5903d59_IMG_20250213_100752_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"660002395\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LAURET H\\u00e9l\\u00e9na\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRIMAUD Herv\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"LAURET H\\u00e9l\\u00e9na\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathy session\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e5ab5590-79a1-4611-9c3f-f857aae24847_20250213_104425_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"510000037\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabien MALLET\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ANDRE Carole\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Fabien MALLET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9536b482-958e-4978-9989-3bd87a85ccf6_Ostheo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"217031376\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Rapha\\u00ebl DUPAQUIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOILLEAUX Marie-Claude\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Rapha\\u00ebl DUPAQUIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6c311fa0-a4c5-435f-81c6-7dec91d04978_FACTURE_POTARD_-_BENEDET_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"050000694\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Denis Potard\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BENEDET STEPHANE\", \"beneficiary_dob\": \"30-05-1992\", \"invoice_date\": \"10-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Denis Potard D.C. C .C.S.P.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ances de traitement\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/618b4431-8938-47c7-b732-a7cb0642edd5_AP_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mich\\u00e8le VAN DE WALLE\", \"total_billed\": 320, \"bill_paid\": true, \"amount_paid\": 300, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PETIT ANNIE\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet De Gestion du Stress et des Troubles du comportement Mich\\u00e8le VAN DE WALLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE INDIVIDUELLE ACTE DE SOPHROLOGIE\", \"quantity\": 1, \"date_of_service\": \"05-01-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"SEANCE INDIVIDUELLE ACTE DE SOPHROLOGIE\", \"quantity\": 1, \"date_of_service\": \"13-01-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"SEANCE INDIVIDUELLE ACTE DE SOPHROLOGIE\", \"quantity\": 1, \"date_of_service\": \"20-01-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"SEANCE INDIVIDUELLE ACTE DE SOPHROLOGIE\", \"quantity\": 1, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/75523c17-e9f0-45d4-88e6-c097d4ec6655_2025-02-00037_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"300007648\", \"rpps_number\": \"10010649449\", \"finess_number\": null, \"doctor_name\": \"L\\u00e9a MAURIN\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Severine Palacio\", \"beneficiary_dob\": \"21-07-1973\", \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"L\\u00e9a MAURIN (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/dd0180f4-f8ea-462a-b97c-30c55778d31e_IMG_1726_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"770001931\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Raymond CIPRIANI\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"23-1-2025\", \"security_number\": null, \"invoice_issuer\": \"Raymond CIPRIANI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-1-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5f424e29-cdd7-4c3f-aceb-28377ea12982_Facture_2500023_Ferre_Bryan_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"440015410\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maureen GRECO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FERRE Bryan\", \"beneficiary_dob\": \"24-08-2002\", \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Maureen GRECO E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/93ab4908-62de-462a-8375-ba876cb4b7c0_IMG_4914_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"670006576\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lien DEBREU\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KELLER EAIL\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SARL DEBREU OSTEOPATHE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/da1943b5-80c4-4105-8b48-78f49245b7c4_Facture_patient__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780003984\", \"rpps_number\": \"10010104940\", \"finess_number\": null, \"doctor_name\": \"Gwenaelle JEGU\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Charlotte COUE\", \"beneficiary_dob\": null, \"invoice_date\": \"21-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Centre M\\u00e9dical de la Femme et du Foetus\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2af3fe93-a285-4ced-898b-90b4106bef8c_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"210002416\", \"rpps_number\": \"10010431293\", \"finess_number\": null, \"doctor_name\": \"Anne GUICHARD\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SORAND Adrien\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": \"192032160323671\", \"invoice_issuer\": \"Anne GUICHARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/db570c09-5785-42c2-8de2-18cb7355cc7d_facture_kinesiologue_Alice_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9siologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PIGNEUL Virginie\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VOISIN Alice\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"PIGNEUL Virginie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Kin\\u00e9siologie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fd94f8a5-93f4-49dd-a4c4-cf0b4765d704_Facture_Osteopathe_120225_Daoud_DEBBACHE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130021041\", \"rpps_number\": \"10010798758\", \"finess_number\": null, \"doctor_name\": \"Manrique Mallorie\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Debbache Daoud\", \"beneficiary_dob\": \"16-08-1989\", \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Manrique Mallorie E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'Ost\\u00e9opathie Adulte\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ce419ea1-7be7-437d-a30f-ecc621560253_07-02-2025_facture_osteo__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"950003582\", \"rpps_number\": \"10010329067\", \"finess_number\": null, \"doctor_name\": \"Marc LAGNEAU\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIEIRA Silvio\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"1801059183165\", \"invoice_issuer\": \"Marc LAGNEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/eb75a568-3d2e-443d-95bc-ea195f3e2eb6_Screenshot_2025-02-12-21-26-58-619_com.intsig.camscanner_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"020001228\", \"rpps_number\": \"10010470689\", \"finess_number\": null, \"doctor_name\": \"Eden PICCO\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Madeleine STEVENS\", \"beneficiary_dob\": \"26-04-2024\", \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Eden PICCO (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/27af0979-5d5f-4dee-b6b3-a8ef17cce646_facture_osteopathie_Dufour_Francois_04-02-2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620004218\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laygnez Victorien\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fran\\u00e7ois Dufour\", \"beneficiary_dob\": \"27-12-1973\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Laygnez Victorien E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b6a49fa0-2900-45c1-915c-58d63bf64e48_OSTEO__NASSER_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940003148\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Melhina Tounsi\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nasser BENABBAS\", \"beneficiary_dob\": null, \"invoice_date\": \"29-11-2024\", \"security_number\": \"158049935462238\", \"invoice_issuer\": \"Melhina Tounsi\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-11-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4aaac3ad-97c7-4700-8804-3df05da4763f_IMG_4944_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930001581\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aurore Sacarrere\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HIERRO GUTIERREZ Lucas\", \"beneficiary_dob\": \"26-07-2001\", \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Center de Pr\\u00e9vention Sant\\u00e9 de Fontainebleau\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/560bfbce-bac5-4d39-be3d-9188bd382fb0_CamScanner_12-02-2025_21.48_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/560bfbce-bac5-4d39-be3d-9188bd382fb0_CamScanner_12-02-2025_21.48_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": \"94107881801\", \"finess_number\": null, \"doctor_name\": \"Marion David\", \"total_billed\": 66.32, \"bill_paid\": true, \"amount_paid\": 26.52, \"mandatory_coverage\": 39.8, \"complementary_coverage\": 0, \"client_part\": 26.52, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"STARON Jessy\", \"beneficiary_dob\": \"15-12-1992\", \"invoice_date\": null, \"security_number\": \"192126938328538\", \"invoice_issuer\": \"Mme MARION DAVID\", \"currency\": \"EUR\", \"items\": [{\"description\": \"RAM 7.5\", \"quantity\": 1, \"date_of_service\": \"05-12-2024\", \"mandatory_coverage\": null, \"amount\": 16.58}, {\"description\": \"RAM 7.5\", \"quantity\": 1, \"date_of_service\": \"17-12-2024\", \"mandatory_coverage\": null, \"amount\": 16.58}, {\"description\": \"RAM 7.5\", \"quantity\": 1, \"date_of_service\": \"19-12-2024\", \"mandatory_coverage\": null, \"amount\": 16.58}, {\"description\": \"RAM 7.5\", \"quantity\": 1, \"date_of_service\": \"24-12-2024\", \"mandatory_coverage\": null, \"amount\": 16.58}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}, {\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": \"94107881801\", \"finess_number\": null, \"doctor_name\": \"Marion David\", \"total_billed\": 82.9, \"bill_paid\": true, \"amount_paid\": 82.9, \"mandatory_coverage\": 49.75, \"complementary_coverage\": null, \"client_part\": 33.15, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"STARON Jessy\", \"beneficiary_dob\": \"15-12-1992\", \"invoice_date\": \"19-08-2024\", \"security_number\": \"192126938328538\", \"invoice_issuer\": \"Mme MARION DAVID\", \"currency\": \"EUR\", \"items\": [{\"description\": \"RAM 7.5\", \"quantity\": 1, \"date_of_service\": \"07-01-2025\", \"mandatory_coverage\": null, \"amount\": 16.58}, {\"description\": \"RAM 7.5\", \"quantity\": 1, \"date_of_service\": \"09-01-2025\", \"mandatory_coverage\": null, \"amount\": 16.58}, {\"description\": \"RAM 7.5\", \"quantity\": 1, \"date_of_service\": \"16-01-2025\", \"mandatory_coverage\": null, \"amount\": 16.58}, {\"description\": \"RAM 7.5\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 16.58}, {\"description\": \"RAM 7.5\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 16.58}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2ffb844a-d807-42e3-93e8-18ee0285a542_facture-POULAIN_Justine_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930006408\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cacciottolo Sacha\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"POULAIN Justine\", \"beneficiary_dob\": \"15-03-1994\", \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cacciottolo Sacha\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1b68fc1f-1e79-4f0b-a9dd-700931da1db1_FACTURE-25-02-542-PLANVIER_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ST\\u00c9PHANE CHETREFF\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 60, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Geoffrey PLANVIER\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"R\\u00c9vel'DYS - ST\\u00c9PHANE CHETREFF - ACCOMPAGNEMENT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1er rendez-vous du 11/02/25 pour Kahys 14ans\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ecc107be-fcec-4b98-a0ed-bd325bd27b35_recu-fac-2025-02-031_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340011709\", \"rpps_number\": null, \"finess_number\": \"342700192\", \"doctor_name\": \"Dorian Jourliac\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Julien Corinne\", \"beneficiary_dob\": \"25-11-1966\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Dorian Jourliac\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en Ost\\u00e9opathie - ADULTE (Cabinet)\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0b801a90-3a0a-4b0c-bc7d-56f18b938619_2025-01-23-ophelie-lenoir_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780007753\", \"rpps_number\": \"10010340908\", \"finess_number\": null, \"doctor_name\": \"Jeremie NGUYEN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LENOIR Oph\\u00e9lie\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE Jeremie NGUYEN EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e38727c4-1d88-4884-9d8e-0fc021fa21bf_NOTE_DHONORAIRE_-_PAUZANO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130024060\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MENARD Joanna\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PAUZANO Ludovic\", \"beneficiary_dob\": \"21-04-1997\", \"invoice_date\": \"11-01-2025\", \"security_number\": null, \"invoice_issuer\": \"MENARD Joanna\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/054de057-2011-49a3-9d11-3529ac66348d_roux_clemence_facture_2025-02-12_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330014101\", \"rpps_number\": \"10010241502\", \"finess_number\": null, \"doctor_name\": \"MAZERES Chlo\\u00e9\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROUX Cl\\u00e9mence\", \"beneficiary_dob\": \"06-09-1987\", \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"E.I. MAZERES Chlo\\u00e9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ed2edb2f-d8f6-4194-8a2e-a42e49d6c317_2025-02-12_18-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"650001597\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dominique SALCUNI\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DOMEC Coline\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Dominique SALCUNI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/de7be139-654f-4d9b-9179-2c08f02e0e93_osteo_noelle_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"250002185\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Brigitte DANZL LEHEC\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Husq No\\u00eblle\", \"beneficiary_dob\": null, \"invoice_date\": \"29-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Brigitte DANZL LEHEC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false, \"blurred_image\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/48534202-89fa-4ee1-ad2c-a3f0b386c454_facture_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"450001235\", \"rpps_number\": \"10010412889\", \"finess_number\": null, \"doctor_name\": \"Dimino Alexis\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"DIMINO Alexis E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/76577d77-1213-4994-9360-9b72f6053892_Document_PDF_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"110000874\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lucie Barthe\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"OLLETA Bernard\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Lucie Barthe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d4724213-255b-41ec-aa7c-608661c35856__Facture_Ostheo_Fernandez_Thibault_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"110002250\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COUSTAL Antoine\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fernandez Thibault\", \"beneficiary_dob\": \"09-08-1989\", \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"COUSTAL Antoine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/137e6371-6478-4a36-8383-0090366a49fe_facture_Gros_Barbara_3_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"380004853\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Schwebel Emmanuel\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GROS Barbara\", \"beneficiary_dob\": \"31-07-1953\", \"invoice_date\": \"02-12-2024\", \"security_number\": \"253077511248284\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Emmanuel Schwebel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0cae671e-6b7f-41aa-a0ab-d74b8e15e424_DIMART_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"890000748\", \"rpps_number\": \"10005547814\", \"finess_number\": null, \"doctor_name\": \"BOUGHRARA Mohamed\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DIMART JULIEN\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BOUGHRARA Mohamed\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/51d90245-4a54-4a29-a6d5-8d6dac173be7_ostheo_du_12.20.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620001925\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GOLON Sabrina\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEPRAETE Adrien\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": \"192125901736454\", \"invoice_issuer\": \"GOLON Sabrina\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/eb32c5c4-3c28-4740-8c3b-687a7c0c64df_IMG_20250212_170008_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620003566\", \"rpps_number\": \"10010156866\", \"finess_number\": null, \"doctor_name\": \"Maxime CARBONI\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alison Ongenae\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Maxime CARBONI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/705c782d-1a9f-4cab-864f-95ab9bdf3180_Facture_2025-02-013_-_S_Martine_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"820000057\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ingrid Dupouy Foultier\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Martine STRUGACZ\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"267088203322113\", \"invoice_issuer\": \"Mme Ingrid Dupouy Foultier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"24-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bef5bd2d-a370-457f-9d30-f7ce27c4dd70_Facture_2024-12-002_-_S_Frederic_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"820000057\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ingrid Dupouy Foultier\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fr\\u00e9d\\u00e9ric STRUGACZ\", \"beneficiary_dob\": null, \"invoice_date\": \"03-12-2024\", \"security_number\": \"1670832256086\", \"invoice_issuer\": \"Mme Ingrid Dupouy Foultier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-12-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f8c6fe47-9608-4f9e-938b-117bfa4dcd30_ostheopatie_2025-02-12_Grenot-JLuc_Mme-Tabary_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"710000167\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TABARY Nad\\u00e8ge\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRENOT Jean-Luc\", \"beneficiary_dob\": \"03-04-1955\", \"invoice_date\": \"12-02-2025\", \"security_number\": \"155043919821109\", \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie TABARY Nad\\u00e8ge\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1fb1f78e-5478-4fcd-8549-14d4a6ceef5d_Note_00001E99_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690004122\", \"rpps_number\": \"10010677499\", \"finess_number\": null, \"doctor_name\": \"Alexandre DOMANGE\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Amina TOUATI\", \"beneficiary_dob\": \"09-05-1990\", \"invoice_date\": \"28-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Alexandre DOMANGE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"20-01-2025\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ffdfe97d-ff24-4877-a289-51d0dfeab716_Note_00001EAF_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690004122\", \"rpps_number\": \"10010677499\", \"finess_number\": null, \"doctor_name\": \"Alexandre DOMANGE\", \"total_billed\": 76, \"bill_paid\": true, \"amount_paid\": 76, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Amina TOUATI\", \"beneficiary_dob\": \"09-05-1990\", \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Alexandre DOMANGE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 76}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1b80c90d-db06-422b-a4bc-f2911f3cc8b7_Chiropracteur__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"210003075\", \"rpps_number\": \"10010139847\", \"finess_number\": null, \"doctor_name\": \"Oph\\u00e9lie MATHIAS\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GONIN C\\u00f4me\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Centre Chiropratique Dijon\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation pour des soins de chiropraxie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4d0b02fb-a3b2-46ff-a5da-2422e5339e8f_facture_osteopathe__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"770010056\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LEARY Loriane\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLAHAUT Joris\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": \"189115935048169\", \"invoice_issuer\": \"LEARY Loriane\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/902a083e-9309-44ae-9a0d-184fd436bb26_IMG-20250212-WA0016_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"9D0004211\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Guillaume PAYET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CL\\u00c9MENT HOARAU\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SASU SANT\\u00c9O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/110bf0ce-0cc7-4189-ab0e-cb2b8cab1cd9_Ostheo_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130011828\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline Perez\", \"total_billed\": 61, \"bill_paid\": false, \"amount_paid\": 0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ABILLARD BERNARD\", \"beneficiary_dob\": \"16-04-1962\", \"invoice_date\": \"09-12-2024\", \"security_number\": \"162041305500217\", \"invoice_issuer\": \"Caroline Perez\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"09-12-2024\", \"mandatory_coverage\": null, \"amount\": 61}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/421c1905-1ef9-4169-be9b-d3831e233f40_ostheo_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130011828\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline Perez\", \"total_billed\": 61, \"bill_paid\": true, \"amount_paid\": 61, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ABILLARD BERNARD\", \"beneficiary_dob\": \"16-04-1962\", \"invoice_date\": \"29-10-2024\", \"security_number\": \"162041305500217\", \"invoice_issuer\": \"Caroline Perez\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"29-10-2024\", \"mandatory_coverage\": null, \"amount\": 61}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e191a54b-f371-46ae-b65d-0d5bf37f7cd6_Ostheo_3_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130011828\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline Perez\", \"total_billed\": 61, \"bill_paid\": true, \"amount_paid\": 61, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ABILLARD BERNARD\", \"beneficiary_dob\": \"16-04-1962\", \"invoice_date\": \"12-10-2024\", \"security_number\": \"162041305500217\", \"invoice_issuer\": \"Caroline Perez\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"12-10-2024\", \"mandatory_coverage\": null, \"amount\": 61}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ff6211dc-b681-4709-9451-d36fb961476f_MICHELIZZA_QuittanceFSE_1952_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": \"101065933600\", \"finess_number\": null, \"doctor_name\": \"MICHON Clement\", \"total_billed\": 280, \"bill_paid\": true, \"amount_paid\": 280, \"mandatory_coverage\": 119.84, \"complementary_coverage\": 0.0, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MICHELIZZA SYLVIANE\", \"beneficiary_dob\": \"01-02-1952\", \"invoice_date\": \"26-11-2024\", \"security_number\": \"252027510700105\", \"invoice_issuer\": \"MICHON Clement\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 x TER 9,49\", \"quantity\": 1, \"date_of_service\": \"14-10-2024\", \"mandatory_coverage\": 12.58, \"amount\": 31}, {\"description\": \"1 x IFO\", \"quantity\": 1, \"date_of_service\": \"14-10-2024\", \"mandatory_coverage\": 2.4, \"amount\": 4}, {\"description\": \"1 x TER 9,49\", \"quantity\": 1, \"date_of_service\": \"17-10-2024\", \"mandatory_coverage\": 12.58, \"amount\": 31}, {\"description\": \"1 x IFO\", \"quantity\": 1, \"date_of_service\": \"17-10-2024\", \"mandatory_coverage\": 2.4, \"amount\": 4}, {\"description\": \"1 x TER 9,49\", \"quantity\": 1, \"date_of_service\": \"21-10-2024\", \"mandatory_coverage\": 12.58, \"amount\": 31}, {\"description\": \"1 x IFO\", \"quantity\": 1, \"date_of_service\": \"21-10-2024\", \"mandatory_coverage\": 2.4, \"amount\": 4}, {\"description\": \"1 x TER 9,49\", \"quantity\": 1, \"date_of_service\": \"24-10-2024\", \"mandatory_coverage\": 12.58, \"amount\": 31}, {\"description\": \"1 x IFO\", \"quantity\": 1, \"date_of_service\": \"24-10-2024\", \"mandatory_coverage\": 2.4, \"amount\": 4}, {\"description\": \"1 x TER 9,49\", \"quantity\": 1, \"date_of_service\": \"31-10-2024\", \"mandatory_coverage\": 12.58, \"amount\": 31}, {\"description\": \"1 x IFO\", \"quantity\": 1, \"date_of_service\": \"31-10-2024\", \"mandatory_coverage\": 2.4, \"amount\": 4}, {\"description\": \"1 x TER 9,49\", \"quantity\": 1, \"date_of_service\": \"04-11-2024\", \"mandatory_coverage\": 12.58, \"amount\": 31}, {\"description\": \"1 x IFO\", \"quantity\": 1, \"date_of_service\": \"04-11-2024\", \"mandatory_coverage\": 2.4, \"amount\": 4}, {\"description\": \"1 x TER 9,49\", \"quantity\": 1, \"date_of_service\": \"07-11-2024\", \"mandatory_coverage\": 12.58, \"amount\": 31}, {\"description\": \"1 x IFO\", \"quantity\": 1, \"date_of_service\": \"07-11-2024\", \"mandatory_coverage\": 2.4, \"amount\": 4}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fdaa0d42-8e79-4506-b222-373c88891ff5_notehonoraire__20250212_143947_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"810003095\", \"rpps_number\": \"10010648318\", \"finess_number\": null, \"doctor_name\": \"CUSIN L\\u00e9na\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LOU SOULi\\u00e9 BERNET\", \"beneficiary_dob\": \"22-12-2011\", \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CUSIN L\\u00e9na (El)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Note d'honoraire\", \"quantity\": null, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2f91e647-c8b5-42c5-9a4e-e6dc639bd488_2025-02-00097_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780014098\", \"rpps_number\": \"10010373529\", \"finess_number\": null, \"doctor_name\": \"Julie PASQUALI\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sylvain MILLE\", \"beneficiary_dob\": \"07-09-1986\", \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Julie PASQUALI (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8f8adcc8-9b8f-48e0-94d5-d8cfb9d84a1f_Scanned_20250212_154615_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10001749646\", \"finess_number\": null, \"doctor_name\": \"Dr OLIVER CHARLOT\", \"total_billed\": 25, \"bill_paid\": true, \"amount_paid\": 25, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DIAS ALVES JULIEN\", \"beneficiary_dob\": \"06-02-1982\", \"invoice_date\": \"12-02-2025\", \"security_number\": \"182027645106607\", \"invoice_issuer\": \"CABINET MEDICAL DE GUEUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acte effectu\\u00e9 par un m\\u00e9decin ost\\u00e9opathe agr\\u00e9\", \"quantity\": null, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 25}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e01ef5c0-efc8-4f46-9868-b2b7ea2e85e9_Facture_4_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"740007109\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BRAIZAZ Clara\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HELION Genevieve\", \"beneficiary_dob\": null, \"invoice_date\": \"29-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET PARAMEDICAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de HELION Genevieve\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2b39033c-dfff-4053-8765-682d1b5ee9b2__Facture_Ostheo_Fernandez_Thibault_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"110002250\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COUSTAL Antoine\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fernandez Thibault\", \"beneficiary_dob\": \"09-08-1989\", \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"COUSTAL Antoine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f92ab8b2-8fa7-4813-a7ce-da31ddd10546_Strugala_Marie_facture_2025-02-12_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"370001687\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline POUPET\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": 0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"STRUGALA Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/534b115b-9997-48c3-bc09-ffb89e5bddf3_Screenshot_20250212_150902_com.google.android.apps.docs_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780007951\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe CASTILLE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. RENEAUD Fabien\", \"beneficiary_dob\": null, \"invoice_date\": \"29-07-2024\", \"security_number\": null, \"invoice_issuer\": \"Philippe CASTILLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-07-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7e8f4b26-2a8e-40ad-bffa-94a0f7e72e4f_Facture_Osteopathe_12.02.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010580735\", \"finess_number\": null, \"doctor_name\": \"Keith OLIVER\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SALMERON Geaald\", \"beneficiary_dob\": \"07-08-1951\", \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Keith OLIVER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f80fdd7d-021d-4d76-89f7-51a9b89ea7b5_facturier_2025_Fred_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340000546\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. SOCHELEAU Frederick\", \"total_billed\": 85, \"bill_paid\": true, \"amount_paid\": 85, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUMAS Bernard\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"M. SOCHELEAU Frederick\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 85}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/937cba96-3c4d-4e33-8b76-954fafaf4975_recu-fac-3375_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010645967\", \"finess_number\": null, \"doctor_name\": \"FERRARO Romain\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Capdivila Maurice\", \"beneficiary_dob\": \"19-03-1955\", \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"FERRARO Romain\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acte Ost\\u00e9opathique - 05/02/2025\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8a1bbb2c-3ce9-4064-88ad-6b84b707fd0a_osteo_val_11-02_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"660000084\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PERRET PHILIPPE\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VALERIE CARLES\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"EI PERRET PHILIPPE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ART00000008 -Acte d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a64d7f57-a8ed-44e9-ae38-fae9bb713144_Osteopathe__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690042320\", \"rpps_number\": \"10010725462\", \"finess_number\": null, \"doctor_name\": \"Perrine MARTINEZ\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Adrien Attard\", \"beneficiary_dob\": \"11-11-1993\", \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Perrine MARTINEZ (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/39f80d3f-06e9-4bf5-9a57-5bbcb27f3321_facture_osteopathe_12.02.25_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"110000221\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BAYSSIERE Hugo\", \"total_billed\": 65, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BIGORDA Marie-Th\\u00e9rese\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": \"2491111069037\", \"invoice_issuer\": \"BAYSSIERE Hugo\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/39f3f732-bcc2-4c98-b2b2-11a8a2431dd6_fact_sophro_corentin_1fev2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Claire LALOYAUX\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Corentin Six\", \"beneficiary_dob\": null, \"invoice_date\": \"01-02-2025\", \"security_number\": \"1011059606536\", \"invoice_issuer\": \"SOPHRO EQUILIBRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de Sophrologie Cayc\\u00e9dienne\\u00ae\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b7f5ba76-23ab-46eb-819b-17093125fac9_NOYER_Bastian_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910009091\", \"rpps_number\": \"10010366176\", \"finess_number\": null, \"doctor_name\": \"L\\u00e9a GIRARD\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NOYER Bastian\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet ISKIO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie du 05/02/2025\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8dcf02a7-b647-4605-9d95-d3fda3685599_p20250212_14102586_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"570001305\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HOURNAU VALENTINE\", \"total_billed\": 65, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dolores PARTYKA\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"HOURNAU VALENTINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f2cf3af6-98dd-49f8-9d85-533b65215fb3_17393639738994204232790165513253_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940006794\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dyna Bahrami\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEVEZE Eliot\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"MADAME DYNA BAHRAMI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation en ost\\u00e9ophie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/206c2eb0-70e1-421d-9bbb-4bca150f87e7_2025-01-24_Facture_MOREL_Charlotte_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"370001638\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marine Roblin\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MOREL Charlotte\", \"beneficiary_dob\": \"20-11-1993\", \"invoice_date\": \"24-01-2025\", \"security_number\": \"293113705035636\", \"invoice_issuer\": \"Marine Roblin - EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"24-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8eceb56a-b571-4ab6-a64e-668db6bf4ad5_chiropracteur_fevrier_25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"600001028\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurent LETORT\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FABRE Amandine\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Laurent LETORT D.C\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins\", \"quantity\": null, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f0653670-51b4-4b9a-a30f-95b466d13bfc_Facture_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750007544\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dimitri DUMONTIER\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BAUTERAU Franck\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": \"168055814502074\", \"invoice_issuer\": \"Dimitri DUMONTIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a2e53cfb-2035-4a8e-8a7b-6b8777ed8168_Facture_Osteo-1_Agathe__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"9C0000260\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sandrine WILLIAM\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ala\\u00efs Agathe\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"287059730421163\", \"invoice_issuer\": \"Sandrine WILLIAM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f96ba91c-5f49-4c19-bcfb-1faa9803cb0c_attestation_chiropracteur_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"rpps_number\": \"100103478\", \"finess_number\": null, \"doctor_name\": \"Lucie BAUDRILLART\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RASOLO Marina\", \"beneficiary_dob\": \"25-02-1986\", \"invoice_date\": \"11-02-2025\", \"security_number\": \"286029933307750\", \"invoice_issuer\": \"Centre Chiropratique B & S\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1959c640-25ba-42e8-a898-61d5e0d0eb98_Facture_chiropracteur_07022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"finess_number\": null, \"rpps_number\": \"10010496619\", \"doctor_name\": \"Philippe ALTSCHUH\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"290075746306986\", \"invoice_issuer\": \"Philippe ALTSCHUH\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"beneficiary_name\": \"TAIEB Oriane\", \"beneficiary_dob\": \"06-07-1990\", \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9567846a-09af-4b73-9194-28f51c7bebce_Chevance_Lena_facture_2025-02-11_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"920001666\", \"rpps_number\": \"10010309911\", \"finess_number\": null, \"doctor_name\": \"ALICE BESNARD\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHEVANCE LENA\", \"beneficiary_dob\": \"10-11-1994\", \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"ALICE BESNARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7d98b25c-938e-4d62-9e14-ddc1d119fa45_FACTURE_OSTEO_DU_7_FEVRIER_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350010583\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Erwan PETIT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HEJANDI Salim\", \"beneficiary_dob\": \"21-08-1987\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d140e5d2-ccaf-4ef3-acbd-d623e555b39f_attestation_chiropracteur_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"rpps_number\": \"100103478\", \"finess_number\": null, \"doctor_name\": \"Lucie BAUDRILLART\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RASOLO Marina\", \"beneficiary_dob\": \"25-02-1986\", \"invoice_date\": \"11-02-2025\", \"security_number\": \"286029933307750\", \"invoice_issuer\": \"Centre Chiropratique B & S\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f0dd1267-5e7b-4fa9-89f6-9534aeadb602_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"650001712\", \"rpps_number\": \"10010234689\", \"finess_number\": null, \"doctor_name\": \"Yoann SARBARIE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUCLOS Serge\", \"beneficiary_dob\": \"27-03-1965\", \"invoice_date\": \"12-02-2025\", \"security_number\": \"165036528618369\", \"invoice_issuer\": \"Yoann SARBARIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/047b2e29-3030-4b30-b328-8726874f3506_251_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350004222\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien Balmadier\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Charpentier jean Pierrick\", \"beneficiary_dob\": \"15-06-1987\", \"invoice_date\": \"17-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Julien Balmadier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c0ec89f6-4641-443f-bcc4-261e980b4d31_Numerisation_20250212_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940012446\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SIMONIN Lucas\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHARPOT Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SIMONIN Lucas\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/dc89371d-1648-4674-ba86-57579de74299_2500352-ozturk-kaan-90009253167ab6c990cbd9_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"770001220\", \"rpps_number\": \"10010309424\", \"finess_number\": null, \"doctor_name\": \"SABINE CALORI\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"kaan OZTURK\", \"beneficiary_dob\": \"18-08-2024\", \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SABINE CALORI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e25dbadc-3492-4c64-939b-c24d030a129c_FACT_OSTHEO_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"830001145\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Franck COUTIERE\", \"total_billed\": 180, \"bill_paid\": false, \"amount_paid\": 0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Eveline Hamman\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Franck COUTIERE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"21-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6e61ced2-f624-4434-b980-a347976b9612_osteopathe_11.02.2025_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"540002664\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Andr\\u00e9a-Lisa IOPPI\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HUMBERT Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Andr\\u00e9a-Lisa IOPPI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c6a3cd29-b1c5-4058-ad04-0bf2f7f6b001_Facture_osteopathe_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"2100001566\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Eddy RATEAU\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"ALSAYA Darlene\", \"insured_dob\": null, \"beneficiary_name\": \"ALSAYA Darlene\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"292072105425843\", \"invoice_issuer\": \"Eddy RATEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a0bbcbed-7ccd-4e43-9e14-296e186afcce_FACTURE_Osteo_AdlaNadia_11022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340013366\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE FLOC'H PIERRE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Adla Nadia\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"LE FLOC'H PIERRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5c913d9d-15e9-4949-8122-14ae9836529c_IMG_20250212_092314_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Soins hospitalier\", \"adeli_number\": \"440029338\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MIDON Agnes\", \"total_billed\": 35, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 35, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"02-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CLINIQUE JULES VERNE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUP\", \"quantity\": 1, \"date_of_service\": \"02-01-2025\", \"mandatory_coverage\": null, \"amount\": 35}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f4d29dad-6a97-40db-b0e4-372f1eac3f47_facture_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750034480\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexandre FIOR\", \"total_billed\": 70, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sandrine CATHERINE\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2024\", \"security_number\": null, \"invoice_issuer\": \"Alexandre FIOR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/060cd8e5-ac6f-47e5-b8f5-fe9d3920c522_CHIRO_FEVRIER_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"rpps_number\": \"10010554243\", \"finess_number\": null, \"doctor_name\": \"Anthony REGAZZONI\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"invoice_date\": null, \"invoice_issuer\": \"EI Anthony REGAZZONI\", \"beneficiary_name\": \"ROUARD Jacques\", \"security_number\": \"152049935059492\", \"beneficiary_dob\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/95c188de-0800-4cdc-8f02-4c6fdadf3d66_Facture_Osteo_Annick_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750007643\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jos\\u00e9phine Lyon\", \"total_billed\": 90, \"bill_paid\": true, \"amount_paid\": 90, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Amnick PRADELLE\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"261097507700389\", \"invoice_issuer\": \"Jos\\u00e9phine Lyon\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/74cc9bae-c0f3-4d35-b5d9-11c90715dc71_facture_osteopathe_10_fev_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330007386\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Christophe A\\u00efscar\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Caren LANGLOIS\", \"beneficiary_dob\": \"30-05-1978\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Christophe A\\u00efscar\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8ca649d2-4e40-498f-aad8-a6315305c0a4_Note_honoraire_Osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"820000826\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Galmiche Christophe\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOUJU Andr\\u00e9e\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": \"2430624322044\", \"invoice_issuer\": \"Galmiche Christophe - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2a9fa60a-de63-46e4-bed1-e9889e103efa_document_sans_titre_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"100060669\", \"finess_number\": null, \"doctor_name\": \"Thepot Thierry\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"invoice_date\": \"10-02-2025\", \"invoice_issuer\": \"Thepot Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"date_of_service\": \"10-02-2025\", \"amount\": 65, \"quantity\": 1, \"mandatory_coverage\": null}], \"beneficiary_name\": \"DUPIRE Virginie\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3f1263ee-f89f-4fff-a5c4-ca0175f771b0_seance_sophrologie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jennyfer KRIEFF\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laetitia COHEN\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Jennyfer KRIEFF\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de sophrologie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1d51560b-8320-4267-85b8-0473d1e608a3_Facture-2025-2-143-osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"310013636\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pauline Plassard\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Stephane DELPLACE\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mme Pauline Plassard\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation adulte\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/891dad93-044c-45aa-a39a-75fef063675c_janv_2025_-_ostheo_-_Dr_ALVERGNE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"210002861\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florian ALVERGNE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie Florent\", \"beneficiary_dob\": null, \"invoice_date\": \"29-01-2025\", \"security_number\": null, \"invoice_issuer\": \"El Florian ALVERGNE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/62f9b609-ae07-47b7-9e9f-5b28143887e3_IMG_2284_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910006774\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"IMPERATO Jennifer\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JONI Jimmy\", \"beneficiary_dob\": \"19-08-2022\", \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"IMPERATO Jennifer\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cccf5a90-7d2c-46e5-a1a9-4ec676a6964c_IMG_3533_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010362613\", \"finess_number\": null, \"doctor_name\": \"Mathilde Froment\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PEREZ Flavia\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"El Mme. Mathilde Froment\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/27be9780-1ff1-467a-9a69-129a2692553a_IMG20250210204321_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"667011654\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Bruno Azema\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Carine Malecamp\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": \"286036604901236\", \"invoice_issuer\": \"Monsieur Bruno Azema\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/284cb22c-17ad-409b-8d5f-79e43a6dd51b_Facture__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910009935\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lucas Margonty\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laura Julien\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Lucas Margonty\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/36ca9b00-c0bb-4e2c-90da-585b083db352_IMG_9238_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"380007278\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sophie FOURNIER\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KRUPKA Anthony\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"196063854408131\", \"invoice_issuer\": \"Mme FOURNIER Sophie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7bc6ff74-13df-435e-8457-ecbb20d9d366_Demande_remboursement_mutuelle_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": 560000168, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SELIRL S\\u00c9BERT\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUIHO Anne Sophie\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SELIRL S\\u00c9BERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 Consultation ost\\u00e9o\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3b7fafb5-b197-437e-bf3d-84dd5ea73d88_Kinesio_Mathieu_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9siologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9line Abader\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"IBOS CAILLOT Mathieu\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"C\\u00e9line Abader\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de Kin\\u00e9siologie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0a28cce2-34d3-4fa0-8d46-7ff07a0b96c6_Numerisation_20250211_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690011838\", \"rpps_number\": \"10010171238\", \"finess_number\": null, \"doctor_name\": \"M\\u00e9lanie ROBERT\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RILLARD Emma\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"M\\u00e9lanie ROBERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6f92aef5-73c1-404e-aabe-29f40cffd8b9_facture-20250203003_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690022041\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Gladys Morand\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KERSUZAN Romain\", \"beneficiary_dob\": \"12-10-1995\", \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Gladys Morand Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f1f52e8d-326c-4714-ac75-c9ade1d945ca_facture_osteopathe_tosi_christine_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130003759\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SERRES Aurore\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TOSI Christine\", \"beneficiary_dob\": \"06-12-1969\", \"invoice_date\": \"03-02-2025\", \"security_number\": \"269121315523214\", \"invoice_issuer\": \"Mme SERRES Aurore\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f4e2083e-3640-4988-853e-83579e9fed87_Adobe_Scan_11_fevr._2025_1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lucle ALBISser\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DIETERICH NORGANE\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Lucle ALBISser\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d06e4662-7a58-42aa-8845-486391ebd183_20250211_191611_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"290006048\", \"rpps_number\": \"10010563103\", \"finess_number\": null, \"doctor_name\": \"SOLLIEC No\\u00e9mie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MOUTINHO Aude\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"278059134517158\", \"invoice_issuer\": \"Mme SOLLIEC No\\u00e9mie El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/594dfde5-cd12-4ea4-a472-a2fc8b91fb1a_OsteoNERONDE_Yasmina_facture_2025-02-11_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780013371\", \"rpps_number\": \"10010141611\", \"finess_number\": null, \"doctor_name\": \"Julie COULOMBEL\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NERONDE Yasmine\", \"beneficiary_dob\": \"19-07-1995\", \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mme Julie COULOMBEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e3a47913-2535-4417-a34f-fb8213cd8174_recu_osteopathe_du_11-02-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"440005478\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LUCILE PICHON\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VEDI Olivier\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"170053523804711\", \"invoice_issuer\": \"LUCILE PICHON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/042baaa0-ebb0-46e5-a848-b01901b58d3f_Lebrun_Y._31.1.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750028227\", \"rpps_number\": \"10010352572\", \"finess_number\": null, \"doctor_name\": \"Romain Mouradian\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEBRUN Yolande\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Romain Mouradian\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION D'OST\\u00c9OPATHIE\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4d8a7c77-2ba7-4071-93a5-a842ff9abfc3_6733_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910003789\", \"rpps_number\": \"10010330123\", \"finess_number\": null, \"doctor_name\": \"Baptiste Maudet\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Barbara Jawor\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Baptiste Maudet Ost\\u00e9opathe Biom\\u00e9canicien.DO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie du jour\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/698ea3aa-944c-41f0-89c2-46a2be3a9189_facture_577852_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690026877\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"El Agathe BRULLIARD\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marion Bastien\", \"beneficiary_dob\": \"04-04-1992\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"P\\u00f4le Sant\\u00e9 Mus\\u00e9um F\\u00e9lix Faure\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/83838f2d-27bc-4bf3-8c22-03ea88618774_facture_Hergott_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"730001888\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Charl\\u00e8ne AUBRETON\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HERGOTT Candice\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": \"165077511441736\", \"invoice_issuer\": \"Charl\\u00e8ne AUBRETON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/398cd878-0581-437d-9390-d464e3c47fbe_Facture_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"520000787\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maud SCARABELLO\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BAUDOT V\\u00e9ronique\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"2660752121010\", \"invoice_issuer\": \"CABINET D'OSTEOPATHIE Maud SCARABELLO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/882e2ad5-e1f1-44fc-b935-08a1c37f52d1_ostheo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"880001847\", \"rpps_number\": \"10010510419\", \"finess_number\": null, \"doctor_name\": \"Lilian REMY\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vincent CIRELLI\", \"beneficiary_dob\": \"16-11-1975\", \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Lilian REMY (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/95fe82b0-b4b4-4de7-9ec5-62b4aefaf62f_Facture_Ostheopathe_2025-02-10_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"310005202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Meunier Camille\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jolivet Kevin\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Meunier Camille\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0ca9563f-d46d-4f22-8f28-a19eca15ae40_Scan0015_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"500001185\", \"rpps_number\": \"1001042447\", \"finess_number\": null, \"doctor_name\": \"Magali GIROT\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ALIFONSO Frances\", \"beneficiary_dob\": \"04-07-1951\", \"invoice_date\": \"11-02-2025\", \"security_number\": \"251079913227514\", \"invoice_issuer\": \"MG Cabinet D'ost\\u00e9opathie GIROT PACAULT GOYET JOUENNE COUBAILLON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/137586df-491e-440d-97b1-0afd0047318f_Facture_2025-02-008_-_L_Ezra_5_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychomotricit\\u00e9\", \"adeli_number\": \"929613230\", \"rpps_number\": \"10006913817\", \"finess_number\": null, \"doctor_name\": \"Lara EL KHAYAT\", \"total_billed\": 180, \"bill_paid\": true, \"amount_paid\": 180, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ezra LATOUR\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Lara EL KHAYAT - Psychomotricienne D.E\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychomotricit\\u00e9\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ance de psychomotricit\\u00e9\", \"quantity\": 1, \"date_of_service\": \"17-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ance de psychomotricit\\u00e9\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/41e00ab7-34ce-4ebd-baae-b7b9e42fb973_17392896793882777076946436275276_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"020001160\", \"rpps_number\": \"10010468865\", \"finess_number\": null, \"doctor_name\": \"Micka\\u00ebl Silva\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CLAUSSE Amaury\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Micka\\u00ebl Silva\", \"currency\": \"EUR\", \"items\": [{\"description\": \"acte ost\\u00e9opathique\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/77c1b3c3-234d-4431-8675-a5c1d12053e0_osteopathe_MOREAU_Coralie_10-02-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010304839\", \"finess_number\": null, \"doctor_name\": \"Nathalie BECU\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MOREAU Coralie\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"E.I. Nathalie BECU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/68e26d63-8bae-4e80-8f45-9a0cf6c23ad1_Osteo__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940012925\", \"rpps_number\": \"10010141033\", \"finess_number\": null, \"doctor_name\": \"LESTIENNE Anne-Laure\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AMOUYAL Pierre Alexandre\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"LESTIENNE Anne-Laure E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/47a3ed6d-aade-4da2-8006-1ec798c5e915_Stiegler_Gary_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690042965\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jade RONDI\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"STIEGLER Gary\", \"beneficiary_dob\": \"31-08-1998\", \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OSTEOPATHIE Rondi Jade\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a4873987-de63-47b0-b0ef-81044554305f_IMG_20250211_163051_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620002204\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maazoun Karim\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Pocher Soizic\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mr MAAZOUN KARIM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f28cb1fa-b824-4d30-8acf-590ed71f56a3_FA_25-1656_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"R\\u00e9flexologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VERGNET Val\\u00e9rie\", \"total_billed\": 25, \"bill_paid\": true, \"amount_paid\": 25, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Varilles Christian\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"VERGNET Val\\u00e9rie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"R\\u00e9flexologie soins de support\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 25}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1866f267-d6a3-49c5-80c3-e1c5d5ee6e72_HAEFFNER_Esther_Honoraires_osteeo-Schweickart_du_070225_Montant_55_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010494549\", \"finess_number\": null, \"doctor_name\": \"Philippe SCHWEICKART\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HAEFFNER ESTHER\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"263126822426518\", \"invoice_issuer\": \"Philippe SCHWEICKART\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/67d85b25-b91b-4e2e-b699-6a2c26043269_Screenshot_2025-02-11-15-26-13-544_com.mi.globalbrowser_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"710000084\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"St\\u00e9phanie Durantet\", \"total_billed\": 68, \"bill_paid\": true, \"amount_paid\": 68, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SCREMENTI St\\u00e9phanie\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"276117127008920\", \"invoice_issuer\": \"St\\u00e9phanie Durantet\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 68}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/09b49af6-99c2-4325-b48a-d4f54de049b1_FACTURE_OSTEOPATHE_11_FEVRIER_2025_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"450002159\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MATTHIEU GABEZ SOULAS\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ATIN Godai\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mr MATTHIEU GABEZ SOULAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f539946f-af74-4bd2-92d6-36c8b867b21e_FACTURE_21.01.2025_JEAN_OSTEO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"660000167\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Heurtel Jean-Yves\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean FERRER\", \"beneficiary_dob\": null, \"invoice_date\": \"24-01-2025\", \"security_number\": \"1520266136133\", \"invoice_issuer\": \"Heurtel Jean-Yves\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"24-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1caeda03-aca9-43f5-8f05-8cd0ed3db1f2_20250211_134942_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"380004168\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"K\\u00e9vin RODIER\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TRELLU Fabien\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet Chiropratique Rodier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/03deb470-7e1b-4ef6-9c03-5e2c4f9aa83d_facture_2023-20-13_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychomotricit\\u00e9\", \"adeli_number\": null, \"rpps_number\": \"10007632705003\", \"finess_number\": null, \"doctor_name\": \"SOUILLAC Christine\", \"total_billed\": 135, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"CHANTREIN\", \"insured_dob\": null, \"beneficiary_name\": \"ELEONORE\", \"beneficiary_dob\": \"13-05-2019\", \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mme SOUILLAC Christine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances de r\\u00e9ducation psychomotrice du 9.01.2025\", \"quantity\": 1, \"date_of_service\": \"09-01-2025\", \"mandatory_coverage\": null, \"amount\": 45}, {\"description\": \"S\\u00e9ances de r\\u00e9ducation psychomotrice du 23.01.2025\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 45}, {\"description\": \"S\\u00e9ances de r\\u00e9ducation psychomotrice du 30.01.2025\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e952e0c6-7be2-422c-bafa-36773a842f74_PXL_20250211_123900568.MP_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330004995\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cl\\u00e9mence Marty\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAMIE Dimitri\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cl\\u00e9mence Marty EI Ost\\u00e9opathe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b2d1c0ae-7f30-4730-82e0-39361ceba000_Facture_du_08.02.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Luc Marchesani\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SERRANO Liam\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Jean-Luc Marchesani\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Sophrologie Enfant Adolescent\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e3e00a63-05a0-4ad9-bd6e-144f1dde039b_Facture_du_14.01.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Luc Marchesani\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SERRANO Liam\", \"beneficiary_dob\": null, \"invoice_date\": \"14-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Jean-Luc Marchesani\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Sophrologie Enfant Adolescent\", \"quantity\": 1, \"date_of_service\": \"11-01-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c32642fc-23ad-4dd1-a1e3-5d5fd5d5c84d_Facture_Evelyne_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sage-femme\", \"adeli_number\": \"275000495\", \"rpps_number\": \"10005024723\", \"finess_number\": null, \"doctor_name\": \"Brasseur Guerin Juliette\", \"total_billed\": 28.5, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Evelyne MOREL\", \"beneficiary_dob\": \"19-06-1972\", \"invoice_date\": \"18-01-2025\", \"security_number\": null, \"invoice_issuer\": \"JULIETTE BRASSEUR GUERIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'acupuncture\", \"quantity\": 1, \"date_of_service\": \"18-01-2025\", \"mandatory_coverage\": null, \"amount\": 28.5}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/34fa78e7-f55a-41eb-911b-3ce4a3339641_Facture_osteopathie_05.02.2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"870000197\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florent MILLE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LARANT B\\u00e9atrice\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": \"263068718713072\", \"invoice_issuer\": \"Florent MILLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"honoraires\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/dcfae795-497c-4d49-8e5d-1c059f6d5cb4_Facture_Osteopathe__2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940011901\", \"rpps_number\": \"10010141090\", \"finess_number\": null, \"doctor_name\": \"Julie BAUDAIS\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BEDRIGNANS Katarina\", \"beneficiary_dob\": \"23-01-1998\", \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Julie BAUDAIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1d3b5d22-87f2-4c89-90e5-8b0350bca84e_facture__Surre_Chloe_03_fevrier_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"119303137\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SAUVAL Karine\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Surre Chlo\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SAUVAL KARINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation pour un accompagnement psychologique\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b10610ca-bc5d-4954-bbc7-f6ab6e640a2c_Facture_250210_211735_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"317033017\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"IZARD PIERRE\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DOLEAC MARIE\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET CAILHABAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de DOLEAC MARIE\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/39b0a511-5694-4690-b409-82a999013f77_note_honoraire_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Johan MABIRE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cynthia Bonnet\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Johan MABIRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention(s) \\u00e9tiopathique(s)\", \"quantity\": 1, \"date_of_service\": \"19-12-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/36571d26-dd70-4596-bcaa-3b77c37e0c38_FACTURE_ETIOPATE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Johan MABIRE\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cynthia Bonnet\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Johan MABIRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention(s) \\u00e9tiopathique(s)\", \"quantity\": 1, \"date_of_service\": \"09-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Intervention(s) \\u00e9tiopathique(s)\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/955425c5-5b0e-4cc7-a53c-60a146885a5d_SLPT25021111580_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"117704098\", \"doctor_name\": \"J\\u00e9rome GAMBIER\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MERCIECA Fr\\u00e9d\\u00e9ric\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": \"174063018907649\", \"invoice_issuer\": \"J\\u00e9rome GAMBIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/147ba58b-baee-46eb-aad0-2738f0714b94_NH_ARNAULT_Manon_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"370000028\", \"rpps_number\": \"10010409406\", \"finess_number\": null, \"doctor_name\": \"Elsa FOULON\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ARMAULT Manon\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"284103705013897\", \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5029a8d0-77b5-4543-b5c8-fc1307d78ea1_20250211_120758_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"440005353\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florence COUTINEAU\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Brillant Vincent\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"1800344109799\", \"invoice_issuer\": \"Florence COUTINEAU D.C\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soin\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/13c5a371-db61-439c-b4cc-f081619f36d3_Facture_A4_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"770001287\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Benjamin LIEUR\\u00c9\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DENOY Bertrand\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Benjamin LIEUR\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4e6947e0-c34b-4fe1-81ab-41fce98ebbed_25_02_10_Facture_Hypnotherapeute_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marilyn ROUDIL\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"D'Ardaillon Marie-Annick\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": \"258108715438153\", \"invoice_issuer\": \"El Marilyn ROUDIL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de hypnoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/72009fc3-e88d-4cbe-b482-7f1f354c17d3_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"137006912\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ababacar SENE\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAMY Dany\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"KINE SPORT SANTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ced5da07-0910-4525-a0b9-f1f931fde4d1_2025-02-06-laurent-leclerc_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010312410\", \"finess_number\": null, \"doctor_name\": \"yann JOFFRE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LECLERC Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e41a3905-f595-4a23-8b98-cfba5c39052f_OSTEO_du_11_02_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"240001123\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Estrade Julien\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SERRA david\", \"beneficiary_dob\": \"22-01-1975\", \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Estrade Julien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/688f2ea6-27c8-4ee1-9f03-351f2992eb18_osteo_2_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350000790\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DELAHAIE-BARLAND Sophie\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean-Fran\\u00e7ois L'HELGOUALC'H\", \"beneficiary_dob\": \"02-03-1955\", \"invoice_date\": \"27-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Sophie DELAHAIE-BARLAND\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/43efc0af-d04d-42a1-b638-89c32aac42d9_osteo_1_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350000790\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DELHAIE-BARLAND Sophie\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean-Fran\\u00e7ois L'HELGOUALCH\", \"beneficiary_dob\": \"02-03-1955\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"DELHAIE-BARLAND Sophie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c9234c29-9d5a-4f8f-bd45-d33c38649f73_20250211_111008_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130009939\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marion THUY\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ragno Jean Michel\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"CABINET D'OSTEOPATHIE CEDRIC DELMAS & MARION THUY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fe0454ba-1a2e-453f-961e-8cdd811688ac_2500024-salamo-ludovic-2845113036792709974b3d_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690042353\", \"rpps_number\": \"10010725587\", \"finess_number\": null, \"doctor_name\": \"Catherine Lesort\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ludovic SALAMO\", \"beneficiary_dob\": \"31-07-1998\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Catherine Lesort\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/45be169f-4e14-4143-adf3-451a0a273f80_2025-02-00037_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"020001939\", \"rpps_number\": \"10010476397\", \"finess_number\": null, \"doctor_name\": \"Alice ROG\\u00c9\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Agn\\u00e8s BAUWE-COLLETTE\", \"beneficiary_dob\": \"19-06-1996\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Alice ROG\\u00c9 (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/92c99802-6675-4f28-aacd-cc314526adbd_IMG-20250106-WA0001_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"060007317\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Boussemart C\\u00e9lia\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lacombe Vincent\", \"beneficiary_dob\": null, \"invoice_date\": \"09-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Boussemart C\\u00e9lia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-12-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/da6997f8-6c41-4ff8-bb10-b58e8c63012f_FACTURE_OSTEO_11-02-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590010898\", \"rpps_number\": \"10010467131\", \"finess_number\": null, \"doctor_name\": \"Mada SAROUT\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. KOEHL Patrice\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mme Mada SAROUT E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/407243a9-16c8-4107-b8b3-5c9ad81c6952_DONNA_Celia_feuille-de-soins_2025-02-11_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940009152\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jordan COUDERC\", \"total_billed\": 270, \"bill_paid\": true, \"amount_paid\": 270, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DONNA C\\u00e9lia\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie \\u00ab La Pointe du lac \\u00bb\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathie adulte\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}, {\"description\": \"Consultation ost\\u00e9opathie p\\u00e9diatrique\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 45}, {\"description\": \"Consultation \\u00e0 domicile 94\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"Consultation \\u00e0 domicile 75\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f8f29a91-6c43-4768-b4ad-ebd1c1cedf5b_17392679302675416955239120322151_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"060007317\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Boussemart C\\u00e9lia\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lacombe Vincent\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Boussemart C\\u00e9lia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ef7f5147-cbdc-435a-beb8-488018b358cc_facture_osteopathe_2025-02-11_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anna DEMANUEL\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"J'DAMOTTE-URSELLA Jean - sieue\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Anna DEMANUEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation \\u00e9tiopathique\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d40eaef8-860d-43f7-9156-3e2dcd09e498_Screenshot_20250211_104714_cn_wps_moffice_eng_PDFReader2_edit_2442102462482098_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexa Gauthier\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VARAIN Cassandre\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Fais Ta Bulle - Alexa Gauthier El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Sophrologie - s\\u00e9ance initiation \\u00e0 domicile\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/70257c2f-ec35-4ac9-9b57-e62a8013bb0d_facture_osteo_cassandre_08022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940002959\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BAOUGH Mehdi\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cassandra le LAN\", \"beneficiary_dob\": \"03-05-2005\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BAOUGH Mehdi\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e5090ee5-7b15-4056-9459-c12d5757b516_1739267158433_CamScanner_11-02-2025_10.45_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"160001616\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ambre JAVELAUD\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LARIVE Yan\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"183089301002463\", \"invoice_issuer\": \"Ambre JAVELAUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b53e8f7a-d934-4e3f-8d03-12e071fdcfd6_Chiro_10_Fevrier_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"rpps_number\": \"10010554243\", \"finess_number\": null, \"doctor_name\": \"Anthony REGAZZONI\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"invoice_date\": \"10-02-2025\", \"invoice_issuer\": \"EI Anthony REGAZZONI\", \"beneficiary_name\": \"LE GALL Roger\", \"insured_dob\": null, \"beneficiary_dob\": null, \"security_number\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"soins de chiropraxie\", \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"quantity\": 1, \"amount\": 40}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/13194c98-bb10-4227-a438-b7a1b9f2b6cb_facture_osteo_caro_08022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940002959\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BAOUGH Mehdi\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Huet Caroline\", \"beneficiary_dob\": \"10-04-1970\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BAOUGH Mehdi\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ee47fa65-6a09-493b-af5e-3d8faf643436_2500016-duvivier-anne-marie-173187397567aa1b97b01b5_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"830008546\", \"rpps_number\": \"10005440911\", \"finess_number\": null, \"doctor_name\": \"Catherine Mauz\\u00e9\", \"total_billed\": 66, \"bill_paid\": true, \"amount_paid\": 66, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Anne-marie DUVIVIER\", \"beneficiary_dob\": \"16-02-1957\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Catherine Mauz\\u00e9 (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 66}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b14402b2-1846-4abb-b17e-31a538802ae8_Facture_Osteopathe_10022025__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"830011722\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VABOIS Emilie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GARCIA Yves\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"VABOIS Emilie Ost\\u00e9opathe D.O - Praticienne Rebouteuse\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/72eca803-8b6c-462c-83b3-ffbccf565a09_invoice-1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340010073\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GENELOT Catherine\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AGGERY Martine\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"GENELOT Catherine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7bb792ad-c082-4a57-a642-6b2f2e247de3_2024-12-20_Osteopathe_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940010341\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MEULNOTTE S\\u00e9bastien\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Johann GUYOT\", \"beneficiary_dob\": null, \"invoice_date\": \"20-12-2024\", \"security_number\": null, \"invoice_issuer\": \"MEULNOTTE S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/899e9e10-fb96-44c3-9208-342b88a4cb73_20250210_202914_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"336018544\", \"rpps_number\": \"10010626926\", \"finess_number\": null, \"doctor_name\": \"Aurore DUBOUILH\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Eva Corino\", \"beneficiary_dob\": \"17-07-1975\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Aurore DUBOUILH\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION D'OSTEOPATHIE AU TARIF DE\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2c07c6c0-9ab6-4578-8a88-704ea038047e_Screenshot_20250211_101627_M365_Copilot_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurie Alet\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jennifer Lalaut\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Laurie Alet\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de sophrologie individuelle\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f7120556-e394-4832-baa8-fcb3b68a1ae9_IMG_20250210_225639_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"310002498\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cyril CHIAROVANO\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TRIXEION Virginie\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Cyril CHIAROVANO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ances d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fe09519f-259b-46c0-9b45-00972e0a148c_Scan_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"247006125\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sabine GONDONNEAU-BEELEN\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAURY Jean\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Sabine GONDONNEAU-BEELEN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c64ad8f0-0d5a-4a5b-aec5-62ec44099520_Screenshot_20250211_100040_Gallery_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330002056\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maryse PICARD\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEPONT Pauline\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Maryse PICARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8f8bf175-244e-4b43-a2e2-b507703ad1bf_IMG_1176_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"9D0000706\", \"rpps_number\": \"10010478583\", \"finess_number\": null, \"doctor_name\": \"Brian LARDY\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CLAIN Dany\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE Brian Lardy El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e358b13d-402c-4e6e-9a65-1160a204353e_Screenshot_20250211_100027_Gallery_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330002056\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maryse PICARD\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUPONT Floriau\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Maryse PICARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2a9dd772-6d23-44ae-95f7-5de05ffd1167_fact_podologue_7-2-25_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940000904\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Su\\u00e9li PEREIRA\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIRAPIN Viviane\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Su\\u00e9li PEREIRA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation(s)\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cd3050c7-d380-4ecc-9665-c7209b6035f9_2025-02-07-malik-chaouche_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"060005410\", \"rpps_number\": \"10010196078\", \"finess_number\": null, \"doctor_name\": \"guillaume HERVY\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHAOUCHE Malik\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE guillaume hervy El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cc0c0b33-1a94-469b-bd88-ca534e257d33_Osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"950001057\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LELOUP Bastien\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bunguy Alain\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"LELOUP Bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0508d173-adb4-4fdd-9ab6-af5d22c7312f_facture_osteopathe__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010419433\", \"finess_number\": null, \"doctor_name\": \"PAJON Jean - Baptiste\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Guilla Soize\", \"beneficiary_dob\": null, \"invoice_date\": \"29-o1-2025\", \"security_number\": null, \"invoice_issuer\": \"PAJON Jean - Baptiste\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9ophathie\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/27c16ad0-3309-4eb1-9432-ab8a546fee7c_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"160002143\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GRELLETY MARGOT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Camille HILT\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": \"208014428833612\", \"invoice_issuer\": \"GRELLETY MARGOT\", \"currency\": null, \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2f3c1f06-a109-4236-944c-c6343d80bbe3_IMG_20250211_0001_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"717009302\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BASTIEN Vincent\", \"total_billed\": 64, \"bill_paid\": true, \"amount_paid\": 64, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHIROL PASCAL\", \"beneficiary_dob\": \"01-04-1961\", \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BASTIEN Vincent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OSTEO\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 64}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/35e4813a-ec5d-4d93-af4a-537d673c6ce5_20250211_083659_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"010006146\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Achille CAYRAT\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"POURCHAIRE ALEXIA\", \"beneficiary_dob\": \"15-10-1995\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Achille CAYRAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/88a54205-4f31-459b-981a-99dadbeaad02_IMG_4210_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940000599\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CYNTHIA RAINE\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Arthur PUENTE\", \"beneficiary_dob\": \"10-09-2023\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"CYNTHIA RAINE EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/00ac2a76-52ec-437a-8477-a573d62ecbe4_Livret_de_Famille_2024_1-6_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/BILLET-DE-LIVRAISON_IMPRIME_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2a14fc39-7d4c-4cde-be4e-012439c34bc6_rib_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2a14fc39-7d4c-4cde-be4e-012439c34bc6_rib_1_scale_1.5_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 27045165-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB-bforbank-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Acte_de_mariage_de_Joachim_Murat_et_de_Marie_Annonciade_Bonaparte-Archives-nationales-AE-I-11-12-12-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d2704df4-4066-449e-adde-b7433b38c718_Facture_69_Lucile_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010634466\", \"fitness_number\": null, \"doctor_name\": \"BARRIOS TH\\u00c9O\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bertin Lucile\", \"beneficiary_dob\": null, \"invoice_date\": \"01-02-2025\", \"security_number\": \"208097851733633\", \"invoice_issuer\": \"BARRIOS TH\\u00c9O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c20881d7-861d-44dd-b69e-a3f5ed4768ea_Osteopathe_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"870001886\", \"rpps_number\": \"100110413697\", \"finess_number\": null, \"doctor_name\": \"Charlotte Rebiffe\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Annabelle DA GLORIA\", \"beneficiary_dob\": \"14-10-1976\", \"invoice_date\": null, \"security_number\": \"276103726112094\", \"invoice_issuer\": \"Charlotte Rebiffe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/91d588d0-aa0a-438c-91c2-06bbe858810c_n96_DUBOIS_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590011748\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BLANCKAERT Karel\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUBOIS Delphine\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"BLANCKAERT Karel E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/70e53730-c41c-402b-a227-2262cdd9fa7b_osteo_Laurette_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010439080\", \"finess_number\": null, \"doctor_name\": \"Maxime FRERET\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SADOC Laurette\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Maxime FRERET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Note d'honoraire\", \"quantity\": null, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a7524bf0-1810-47ff-96c1-e744cbb863be_RDV_04.02.25FA-7564-000052_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marion CHAMASSIAN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ferouz AZOUAOU\", \"beneficiary_dob\": \"28-02-1978\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"El Marion CHAMASSIAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance adulte sophrologie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/417d826b-2733-4565-93a1-358377c55c56_Facture_Justin_renaud_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"060014776\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Hourcastagnou Jacques\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Justin Renaud\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BELHARRA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8c8c6ff8-8e9c-49fc-b131-1dce75bcdfc2_Facture_FA1947_04_02_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Micka\\u00ebl Vadepied\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dufaud Marc\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"H\\u00f4pital priv\\u00e9 du Confluent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Th\\u00e9rapies Br\\u00e8ves\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 58.33}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/221e6223-efc6-44e8-9ef1-c53d5893abf9_Screenshot_20250210-175253_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"310004437\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SEGUIN Olivier\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CAZENEUVE David\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SEGUIN Olivier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bd507a78-b07b-430a-a24b-a38a224e9fd5_CamScanner_10-02-2025_17.59_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"430000646\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MORIN Marie-Anne\", \"total_billed\": 46, \"bill_paid\": true, \"amount_paid\": 46, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CAPON Lauryne\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"2061042095129\", \"invoice_issuer\": \"MORIN Marie-Anne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance(s) de soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 46}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a8c2a548-70cf-4980-8263-45f4e465dc34_ciano_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"060006178\", \"rpps_number\": \"10010781903\", \"finess_number\": null, \"doctor_name\": \"PICART REMI\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CIANO Alexandre\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie PICART REMI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1dd2837d-2682-44df-b033-38358b6ea2d5_recu-fac-2025-3165_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"280000282\", \"rpps_number\": \"10010273943\", \"finess_number\": null, \"doctor_name\": \"Anne-Charlotte DUBIEF\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LACAZETTE Romy\", \"beneficiary_dob\": \"11-02-2012\", \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d' Anne-Charlotte DUBIEF\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Adulte d\\u00e9j\\u00e1 venu au cabinet - Consultation du 10/02/2025\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/254612ad-aaea-43d0-8af7-882709c7e1d4_2025-02-00047_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690026778\", \"rpps_number\": \"10010721776\", \"finess_number\": null, \"doctor_name\": \"Robinson JACQUIN-RAVOT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sarah BECK-FUCHS\", \"beneficiary_dob\": \"08-05-1994\", \"invoice_date\": \"10-02-2025\", \"security_number\": \"294055012931425\", \"invoice_issuer\": \"Robinson JACQUIN-RAVOT (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/01a27c1e-afbe-428f-9261-a9a82d1b2efb_IMG_20250210_171911_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"730003167\", \"rpps_number\": \"10010699188\", \"finess_number\": null, \"doctor_name\": \"BABOULAZ Jean-Victor\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Malait\\u00e9 Yohann\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": \"186089407504750\", \"invoice_issuer\": \"BABOULAZ Jean-Victor\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/06a0b166-a2e2-4ed3-9b4f-9f3f7668badd_FACTURE_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"060002672\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"NICOLAS EISEN\", \"total_billed\": 70, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marline Labade\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"NICOLAS EISEN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2e51aed5-3a16-4b97-9e27-e9eddc4414c9_2025-02-00087_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620005025\", \"rpps_number\": \"10010472388\", \"finess_number\": null, \"doctor_name\": \"Quentin TAILLIEZ\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Noa NAYET\", \"beneficiary_dob\": \"21-05-2005\", \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Quentin TAILLIEZ (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/71c65de3-e97a-4323-8783-3879d60fcfef_osteo_janvier2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"680003365\", \"rpps_number\": \"10010506565\", \"finess_number\": null, \"doctor_name\": \"Pauline KIMMEL\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nicolas POIROT\", \"beneficiary_dob\": \"20-07-1987\", \"invoice_date\": \"09-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Pauline KIMMEL Ost\\u00e9opathe D.O E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5dec7b4c-0160-443c-a498-6e94875222aa_FEVRIER_HYPNO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Virginie Doison\", \"total_billed\": 90, \"bill_paid\": true, \"amount_paid\": 90, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emilie Kerreira\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Virginie Doison\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance Hypnose\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 72}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1819370a-a6dd-4954-b333-09f81aaa1e40_FACTURE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lambat Aline\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ponelle Fran\\u00e7oise\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Lambat Aline\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance de kin\\u00e9sith\\u00e9ologie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/82df68b1-0478-4694-9566-3826dfeb4e5e_25.02.10_LE_LAY_Olivier_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350008413\", \"rpps_number\": \"10010561875\", \"finess_number\": null, \"doctor_name\": \"FEREY Cassandre El\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Olivier LE LAY\", \"beneficiary_dob\": \"25-12-1971\", \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet R\\u00e9\\u00e9ducation Jeanne d'Arc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5e06ef95-6731-4416-a626-67039c3593dd_facture_osteo_chris_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"297017964\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jos\\u00e9 Bidet\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CRAS Christelle\", \"beneficiary_dob\": null, \"invoice_date\": \"05-11-2024\", \"security_number\": \"2851129232127\", \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-11-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0e3176f2-ef50-42cd-a0aa-a683a7c76b0b_Facture_Osteo_du_06-02-2025_Mme_CAPAROS_Margaux__E_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"310001245\", \"rpps_number\": \"10010616224\", \"finess_number\": null, \"doctor_name\": \"Swan PEDRAS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CAPAROS Margaux\", \"beneficiary_dob\": null, \"invoice_date\": \"25-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Swan PEDRAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e5bd6416-b3f3-43aa-8d03-d3480bcccdec_20250210_154543_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130001597\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Samuel FONTANILLE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Louis-Philippe\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": \"1810407019025\", \"invoice_issuer\": \"Samuel FONTANILLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/644c17e0-cced-46f8-87e7-231c6ac0f5b5_ostheo_04.02.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"300005824\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHALLUT Jonathan\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Romera Julie\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CHALLUT Jonathan\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9e0ec80c-7190-4c77-a4f8-3dda603b0b30_Facture_Osteo_-_1ERE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750029852\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"David REDRADO\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"YACONO David\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2025\", \"security_number\": null, \"invoice_issuer\": \"David REDRADO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/be55c852-24ff-46db-8667-78addd0b6470_OSTEO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010522018\", \"finess_number\": null, \"doctor_name\": \"J\\u00e9r\\u00f4me Jacquet\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mansuy Karl\", \"beneficiary_dob\": \"30-06-1973\", \"invoice_date\": \"03-02-2025\", \"security_number\": \"17306491007150854\", \"invoice_issuer\": \"E.I. J\\u00e9r\\u00f4me Jacquet\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4dc7d3f6-a977-4ef6-9dc7-d8f51fe502c1_07_02-Pinelli_Marine__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"920003910\", \"rpps_number\": \"10010321163\", \"finess_number\": null, \"doctor_name\": \"Charlotte Fran\\u00e7ois\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Pinelli Marine\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Charlotte Fran\\u00e7ois E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b4437c5d-5410-4461-8004-7d6a8ff3fbbc_OSTEOPATHE_10022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330000118\", \"rpps_number\": \"10010580289\", \"finess_number\": null, \"doctor_name\": \"Vincent NORMAND\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Limousi Nicolen\", \"beneficiary_dob\": \"07-05-1971\", \"invoice_date\": \"10-02-2025\", \"security_number\": \"171053311902321\", \"invoice_issuer\": \"Vincent NORMAND\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/11a7e90a-3116-4b7a-9b8d-e6949ef0cc00_Adobe_Scan_10_fevr._2025_1_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/11a7e90a-3116-4b7a-9b8d-e6949ef0cc00_Adobe_Scan_10_fevr._2025_1_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590010104\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Betty CHOMBEAU\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Virginie Duyvesne\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"270065959908603\", \"invoice_issuer\": \"Betty CHOMBEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cbbb7087-3f07-4e2c-ba7e-412f813d75aa_Facture_Mr_Cerisier_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"490004033\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"K\\u00e9vin COLIN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gilbert Cerisier\", \"beneficiary_dob\": null, \"invoice_date\": \"20-12-2025\", \"security_number\": null, \"invoice_issuer\": \"COLIN K\\u00c9VIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"20-12-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/781ad55b-d6c6-4c04-81ee-2a7791df4d02_Facture_BOLZERJean_Francois_20250207GMAM_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"rpps_number\": \"10010487618\", \"finess_number\": null, \"doctor_name\": \"Alexis CORTINA\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M BOLZER Jean Fran\\u00e7ois\", \"beneficiary_dob\": \"30-08-1972\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de chiropraxie Cortina\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi Chiropratique\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cc6f476b-07ef-4342-8d4d-3a490ab0d1c9_2025-02-09_22-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590009965\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Gauthier PAYEN\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Th Bleuvray Benjamin\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Gauthier PAYEN Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ab1215bd-b251-4071-85aa-e24ef9dfd006_2024-12-31-valentin-petit-blanc_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010376969\", \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lie DIM\\u00c9GLIO\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PETIT BLANC Valentin\", \"beneficiary_dob\": null, \"invoice_date\": \"31-12-2024\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE Dim\\u00e9glio Aur\\u00e9lie El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"31-12-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0453c161-70af-4474-8854-27ab0f5d7bc0_2025-02-00044-2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"850005430\", \"rpps_number\": \"10010219714\", \"finess_number\": null, \"doctor_name\": \"Etienne MURESAN\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sara Castel\", \"beneficiary_dob\": \"02-09-1968\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Muresan Etienne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4172bea6-7272-4ac4-b934-c61f449e4e21_Facture_Osteopathie_Valerie_CALIOT_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330012485\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lavenu Margaux\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CALIOT Val\\u00e9rie\", \"beneficiary_dob\": \"23-07-1964\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Lavenu Margaux Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ce6ed73c-50e6-4684-9d91-26eba210ba43_Feuille_de_soins_Ostheo_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ce6ed73c-50e6-4684-9d91-26eba210ba43_Feuille_de_soins_Ostheo_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130023658\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas Tecles\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bernado Geoffrey\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Thomas Tecles\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie au cabinet ou domicile\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130023658\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas Tecles\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bernado Geoffrey\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Thomas Tecles\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie au cabinet ou domicile\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/afefdce4-68ba-4cbe-8680-06584acd2c84_ANTOINE_nicole_courrier-dadressage_2025-02-06_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"290001163\", \"rpps_number\": \"10002677697\", \"finess_number\": null, \"doctor_name\": \"PHILIP Arnaud\", \"total_billed\": 81.09, \"bill_paid\": true, \"amount_paid\": 81.09, \"mandatory_coverage\": 49.67, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Nicole ANTOINE\", \"beneficiary_dob\": \"25-07-1956\", \"invoice_date\": \"06-02-2025\", \"security_number\": \"256072901928790\", \"invoice_issuer\": \"CABINET DE MEDECINE MANUELLE - OSTEOPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": 49.67, \"amount\": 81.09}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/079beb8a-e6c0-4d3b-9495-706d7586913a_2025-01-23-nelly-raynaud_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"10010650546\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline Chomarat\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RAYNAUD Nelly\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE Caroline Chomarat El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/79d28636-5bc4-41f1-8cd9-6a2bc63f10ed_facture_osteo_04.02.2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130014582\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thierry DERVAUX\", \"total_billed\": 67, \"bill_paid\": true, \"amount_paid\": 67, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rodriguez Helene\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Thierry DERVAUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 67}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/20d4adc8-b0e1-4080-9b33-f997914100b5_facture_osteo_31.01.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"630004588\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ANTUNES Geoffrey\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BAGGIONI Jean-Cristophe\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"ANTUNES Geoffrey\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/52bf793f-cea7-4827-a719-ac4fae130280_facture_osteo14.01_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130014582\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thierry DERVAUX\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rodriguez Pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"14-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Thierry DERVAUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5b11f47d-80de-4eb6-854c-4fe8bd4e4903_facture_osteo_30.01.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130014582\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thierry DERVAUX\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rodriguez Pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Thierry DERVAUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cf49f64f-639a-4c76-9121-6e5ca6796fdf_Etiopathe_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Etiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9dric LEMAITRE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUERIN PHILIPPE\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": \"166105041013863\", \"invoice_issuer\": \"C\\u00e9dric LEMAITRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"intervention(s) en \\u00e9tiopathie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b4977ff1-87fc-4c52-86ee-05c2d328a2b7_IMG_6991_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"2A0001160\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lie RICHARD\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Pieraggi Gilles\", \"beneficiary_dob\": null, \"invoice_date\": \"15-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Aur\\u00e9lie RICHARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d3aea37d-3c4d-49b1-bbd6-75fdea788fc9_IMG_3831_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330002205\", \"rpps_number\": \"10010583028\", \"finess_number\": null, \"doctor_name\": \"Jean-Yves KREITER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ARDANA Sandy\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Jean-Yves KREITER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2368a8d3-147e-4d8f-b183-889e65f9b30f_Adobe_Scan_10_fevr._2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590010104\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Betty CHOMBEAU\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Betty CHOMBEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4cd799f2-230c-45c9-9753-b5424d9fd390_Screenshot_2025-02-10-10-27-16-926_fr.doctolib.www_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340008515\", \"rpps_number\": \"10005696363\", \"finess_number\": null, \"doctor_name\": \"J\\u00e9rome Brechet\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PLASSART Sabrina\", \"beneficiary_dob\": \"09-05-1981\", \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins prest\\u00e9s ce jour\", \"quantity\": null, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ce827d60-2f85-4ecb-9722-edb8090e9b75_soins_05022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"R\\u00e9flexologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jeannine VOLPE-SINCLAIR\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DANIEN KAUVE\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": \"246901243000\", \"invoice_issuer\": \"Jeannine Volpe-Sinclair\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) th\\u00e9rapie manuelle\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2374006b-e4c4-464a-9305-5bf364c1de59_facture_osteo_30_01_25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690025101\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florine LUNION\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lapez Rafail\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Florine LUNION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/27e2e977-a08c-4c84-9b4e-9c8d4662d774_soins_31012025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"R\\u00e9flexologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jeannine VOLPE-SINCLAIR\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DANIEN Karine\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": \"246901243000\", \"invoice_issuer\": \"Jeannine Volpe-Sinclair\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) th\\u00e9rapie manuelle\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b0dda121-46d1-4f2e-ab19-4c125f47f724_image_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340006683\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Yamna BOUTAYEBI-TAHOR\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ARBOUX LIONEL\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Yamna BOUTAYEBI-TAHOR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/05ec43f1-34ea-40e6-b799-413abcba56a5_IMG_1539_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"420002354\", \"rpps_number\": \"10010207461\", \"finess_number\": null, \"doctor_name\": \"POIZAT Perrine\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marine BELLON\", \"beneficiary_dob\": \"20-07-1993\", \"invoice_date\": null, \"security_number\": \"293073403221258\", \"invoice_issuer\": \"Cabinet POIZAT Perrine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4b4bf0a1-472b-40b2-871f-0a0b74371d60_facture_osteopathe_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"380003517\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERTRAND Bastien\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NUCKA Aude\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"288113842106882\", \"invoice_issuer\": \"BERTRAND Bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/66374f1a-2b53-4106-b2cc-f56005caba70_Kinesiologie_Fevrier_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9nerg\\u00e9tie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PASCAL CLAIRE EI\", \"total_billed\": 110, \"bill_paid\": true, \"amount_paid\": 110, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CALAS MARC\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Clair\\u00c9nerg\\u00e9tique - PASCAL CLAIRE EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soin \\u00e9nerg\\u00e9tique en cabinet 2 heures\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 110}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bce5f438-e8f9-4089-b6c9-c3906bbc095f_Facture_Osteopathe_Karine_ANTONY_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"310012299\", \"rpps_number\": \"10010616034\", \"finess_number\": null, \"doctor_name\": \"Maxime BENAZET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ANTONY KANRINE\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/50b0e3a7-5052-4fa1-b2ca-d2d90911b9c9_Facture_osteopathe__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"570001412\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HUGUES Laura\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KORUTOS Axel\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Hugues\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de KORUTOS Axel le 06/02/2025\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/75911aee-b045-4d1f-b1a7-2104dcc220c0_Facture_KACHNICNoory_20250204EIIO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"750009581\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"de BOURBON Philippe\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M KACHNIC Noory\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SARL Philippe de Bourbon\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acte Chiropratique\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7b6705b4-d7c8-4db2-93e2-3ca596ef641c_2025-01-00082_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"420005803\", \"rpps_number\": \"10010714383\", \"finess_number\": null, \"doctor_name\": \"Lisa TURCO\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sandrine CORNILLON\", \"beneficiary_dob\": \"28-08-1975\", \"invoice_date\": \"18-01-2025\", \"security_number\": null, \"invoice_issuer\": \"E.i lisa TURCO (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fe3ade48-89c0-4ac3-81f3-6dae15e20859_Ostheopate_07_02_25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130014103\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CLAD Sylvain\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RODRIGUEZ Thierry\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CLAD Sylvain\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4cee9980-7485-4c1b-8bc9-ef56e2bb294c_facture_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750032732\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"OBLED Clara\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BUQUOI Clotilde\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE PARIS 11\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3860a65d-b7fe-45ed-942d-5832de69430e_Facture_osteo_15-01-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010338159\", \"finess_number\": null, \"doctor_name\": \"LEGUY Elsa\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"15-01-2025\", \"security_number\": null, \"invoice_issuer\": \"LEGUY Elsa\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8682e432-4b4f-4152-a1d6-0e5f52eee148_Osteo_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750013179\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emma Gauthier\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bucca Clotilde\", \"beneficiary_dob\": null, \"invoice_date\": \"13-05-2024\", \"security_number\": null, \"invoice_issuer\": \"Emma Gauthier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-05-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fbe11d63-7753-4ad8-ac65-3f84d35a79bb_Facture_osteopathe_du_3.02.2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"850000647\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BILLAUD DAVID\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gautier Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BILLAUD DAVID\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b4c1bab8-2970-4bdd-9f35-7322e2232357_Facture_000655_LASERBLOIS_Nicolas_DUBOUT_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sevrage tabagique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 190, \"bill_paid\": true, \"amount_paid\": 190, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nicolas DUBOUT\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SAS LASERBLOIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Sevrage tabagique Coatching stop tabac\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 190}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/717d0d8d-3065-41d4-99d9-8c42dbcab6a8_Viani_Doris_facture_2025-02-01_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010146230\", \"finess_number\": null, \"doctor_name\": \"Tatiana KOJIC\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIANI Doris\", \"beneficiary_dob\": null, \"invoice_date\": \"01-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Tatiana KOJIC (El)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-02-2025\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b28a8ae6-8d73-49d9-831f-f38ced7ea3a8_IMG_20250209_202807_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"920000940\", \"rpps_number\": \"10010304391\", \"finess_number\": null, \"doctor_name\": \"Rodolphe BOHEC\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CARMIGNAC CL\\u00c9O\", \"beneficiary_dob\": null, \"invoice_date\": \"13-11-2024\", \"security_number\": \"288129404105310\", \"invoice_issuer\": \"Rodolphe BOHEC (E.I.)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-11-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/141d1472-51b6-49f1-a349-e2b5a2adbdde_Facture_osteo__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"880002159\", \"rpps_number\": \"10010512746\", \"finess_number\": null, \"doctor_name\": \"LANDLER-HURY Charlotte\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chadel Amandine\", \"beneficiary_dob\": \"27-11-1988\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"LANDLER-HURY Charlotte EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation adulte d'ost\\u00e9opathie - 08/02/2025\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ffcc4998-24bd-4317-b3c9-e1c59eab2dd6_facture_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"S\\u00e9bastien CHENEVEZ\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DELAIN CLARA\", \"beneficiary_dob\": null, \"invoice_date\": \"01-02-2025\", \"security_number\": null, \"invoice_issuer\": \"S\\u00e9bastien CHENEVEZ EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention \\u00e9tiopathique\", \"quantity\": 1, \"date_of_service\": \"01-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/58b272d5-4396-4a57-8bf4-270353909f08_facture_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"S\\u00e9bastien CHENEVEZ\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DELAIN CLARA\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"S\\u00e9bastien CHENEVEZ EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention(s) \\u00e9tiopathique(s)\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/eac29b5a-2fa8-47f5-b827-bd1923e05786_24051707_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780005815\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Clara MOINE\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PEZERIL Michel\", \"beneficiary_dob\": \"09-07-1979\", \"invoice_date\": \"17-05-2024\", \"security_number\": \"179079206303329\", \"invoice_issuer\": \"Clara MOINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-05-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d2a8cfa5-32ee-4bf7-a77c-8d48625be243_2025-02-00061_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780018750\", \"rpps_number\": \"10010383775\", \"finess_number\": null, \"doctor_name\": \"L\\u00e9a PELOSO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Michel Pezeril\", \"beneficiary_dob\": \"09-07-1979\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"L\\u00e9a PELOSO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/934ce856-861d-44d3-94b2-998eb1a3d763_2025-02-00049_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780018750\", \"rpps_number\": \"10010383775\", \"finess_number\": null, \"doctor_name\": \"L\\u00e9a PELOSO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Audrey Pezeril\", \"beneficiary_dob\": \"04-01-1990\", \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"L\\u00e9a PELOSO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c8b24d6c-cee9-4798-9f6d-2de22f0b4abc_facture-2025020600000007_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010340676\", \"finess_number\": null, \"doctor_name\": \"Laurent KROUM\", \"total_billed\": 90, \"bill_paid\": true, \"amount_paid\": 90, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIEUTOR Thibault\", \"beneficiary_dob\": \"25-05-1993\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"IMSS x SFP - Laurent KROUM Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7cec0f10-5643-45db-a0e7-96d14f8a243f_osteo-0924-coste_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"630000271\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pascale VERNA\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COSTE Daniele\", \"beneficiary_dob\": null, \"invoice_date\": \"23-09-2024\", \"security_number\": null, \"invoice_issuer\": \"Pascale VERNA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation(s)\", \"quantity\": 1, \"date_of_service\": \"23-09-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6c77d51c-20ae-4759-84dd-20a0aa65409e_osteo-1224-coste_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000271\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pascale VERNA\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COSTE Daniele\", \"beneficiary_dob\": null, \"invoice_date\": \"17-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Pascale VERNA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation(s)\", \"quantity\": 1, \"date_of_service\": \"17-12-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2db47c1d-6dc7-4095-92b2-16a3f6bcfb24_facture_osteopathe_01-02-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"540002193\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Baptiste GARNIER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lydie MARCHOIS\", \"beneficiary_dob\": \"20-08-1965\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Baptiste GARNIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"01-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4d169269-e51f-48c7-b2c0-260961caa491_2025-02-00071_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750028243\", \"rpps_number\": \"10010352754\", \"finess_number\": null, \"doctor_name\": \"Marina MARTINHO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Hugo LAPLEAU\", \"beneficiary_dob\": \"15-11-2000\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Marina MARTINHO (El)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fc14b93e-73ae-44e1-82b1-0ca50ee622de_Facture_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"710003419\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mansuy Antoine\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chantal PERRON\", \"beneficiary_dob\": \"16-06-1949\", \"invoice_date\": \"29-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Espace sant\\u00e9 Boucicaut\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/62d5741a-33da-4740-a17b-ecc7c8d35158_Facture_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"710003419\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Antoine Mansuy\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chantal PERRON\", \"beneficiary_dob\": \"16-06-1949\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Espace sant\\u00e9 Boucicaut\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5cf26866-c1ae-4dfd-8793-54ef5148224c_FACTURE_OSTEO_SILLON_DU_22-11-2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"600002729\", \"rpps_number\": \"10010400207\", \"finess_number\": null, \"doctor_name\": \"Laura KOPANIA\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SYMPHON Murielle\", \"beneficiary_dob\": null, \"invoice_date\": \"22-11-2024\", \"security_number\": \"2731297209341\", \"invoice_issuer\": \"LAURA KOPANIA El.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"NOTE D'HONORAIRES ACQUITTEE\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/50553ad6-c299-42f0-855e-ffdfd6c6113e_Facture_Osteopathie_17-12-24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"950000158\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabienne BERS\\u00c9GOL\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chebabatte Tarik\", \"beneficiary_dob\": null, \"invoice_date\": \"17-12-2024\", \"security_number\": \"189079519905215\", \"invoice_issuer\": \"Fabienne BERS\\u00c9GOL E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-12-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/41624052-35d9-4cf9-b3b3-14ce33f31fcc_FACTURE_OSTE_MURIELLE_SILLON_DU_29-11-2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"600002729\", \"rpps_number\": \"10010400207\", \"finess_number\": null, \"doctor_name\": \"Laura KOPANIA\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SYMPHON Murielle\", \"beneficiary_dob\": null, \"invoice_date\": \"29-11-2024\", \"security_number\": \"2731297209341\", \"invoice_issuer\": \"LAURA KOPANIA El. OSTEOPATHE D.O.F\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-11-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f1a13712-01f0-4907-b3f5-e2306d4fd44b_Facture_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"147012777\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"L'HIRONDEL Wilfrid\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"INIESTA St\\u00e9phanie\", \"beneficiary_dob\": null, \"invoice_date\": \"29-01-2025\", \"security_number\": null, \"invoice_issuer\": \"L'hirondel wilfrid\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de INIESTA St\\u00e9phanie\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c2b83174-b925-4b53-a364-4f90864eef93_pdfOlivier_B_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330000639\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Guillaume LEGRAND\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Olivier BONABAL\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Guillaume LEGRAND\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/64423bd1-67f9-49e4-8192-c9e6fae257fc_1739110695856573011383863310512_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"560002743\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Launay Adrien D.C\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Morice ANGELIQUE\", \"beneficiary_dob\": null, \"invoice_date\": \"28-01-2025\", \"security_number\": \"290092227817310\", \"invoice_issuer\": \"Launay Adrien D.C\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9dcfc4d4-73ce-4ba3-a120-eb356b3536e6_Facture_Osteopathe_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"760003905\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Guillaume VAYSSIER\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PHILIPPE Arnaud\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": \"177097654029188\", \"invoice_issuer\": \"Guillaume VAYSSIER OST\\u00c9OPATHE D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/511ba153-1eae-4682-ade0-01217ff522bf_Facture_consultation_130125_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690002910\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RUBEN PUGNET\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JADE HUGUET\", \"beneficiary_dob\": null, \"invoice_date\": \"13-01-2025\", \"security_number\": null, \"invoice_issuer\": \"RUBEN PUGNET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"NOTE D'HONORAIRE EN OSTEOPATHIE\", \"quantity\": null, \"date_of_service\": \"13-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b013c377-130f-4bb8-a0ec-410e5e43b7f0_Seance_Hypnose_070225_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maurinier Patricia\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mans Melanie\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Hypno Terre Happy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'hypnose\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/65c129d6-b875-4168-913c-84b40ecc4162_1_ere_facture_osteo_jerome_TITE__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330025818\", \"rpps_number\": \"10010276995\", \"finess_number\": null, \"doctor_name\": \"Bruce CLEOPHAS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jerome TITE\", \"beneficiary_dob\": \"21-03-1978\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Bruce CLEOPHAS (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f041f191-6b8f-429a-b8d9-1204957390b2_JANNEAU_DAVID_JANV25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Etiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sandra BARRET BOSSET\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JANNEAU DAVID\", \"beneficiary_dob\": \"08-12-1980\", \"invoice_date\": \"23-01-2025\", \"security_number\": \"180127115303163\", \"invoice_issuer\": \"Sandra BARRET BOSSET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention Etiopathique(s)\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/64fa4c19-efa4-48f1-acf8-8c2e8f573b8b_Facture_osteopathe__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"600003651\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cl\\u00e9ment PAPAUX\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ESTEVE J\\u00e9r\\u00f4me\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cl\\u00e9ment PAPAUX\", \"currency\": \"EUR\", \"items\": [{\"description\": null, \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b0977b46-747a-418c-bd20-9e99472917a2_Screenshot_20250209-143006_Samsung_Notes_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"920001450\", \"rpps_number\": \"10010095312\", \"finess_number\": null, \"doctor_name\": \"J\\u00e9r\\u00f4me SEBBAN\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Millet Marina\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"J\\u00e9r\\u00f4me SEBBAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/38b89b50-a6b6-4816-a495-ce2d9e71082f_Kine-28-11-2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"447003633\", \"rpps_number\": \"10101273968\", \"finess_number\": null, \"doctor_name\": \"Emmanuelle de BUYER\", \"total_billed\": 20, \"bill_paid\": true, \"amount_paid\": 20, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUVAL Paul\", \"beneficiary_dob\": null, \"invoice_date\": \"28-11-2024\", \"security_number\": null, \"invoice_issuer\": \"Emmanuelle de BUYER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins Hors Nomenclature (HN)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3f4d05df-aa5e-4d73-8601-cab801b4eb0b_AMADIS_Marie_Facture_osteo_14nov_2024_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PIOUD Benjamin\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AMADIS Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"14-11-2024\", \"security_number\": null, \"invoice_issuer\": \"PIOUD Benjamin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-11-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/835cfd39-4ac3-499f-882a-89956b32970c_IMG_5007_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330013681\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHLO\\u00c9 BUYSSENS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Margaux DUPLAND\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CHLO\\u00c9 BUYSSENS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e1c02a95-aff0-4809-b30e-101ad81213b3_osteo_st1_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780002051\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Virginie PIEL\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LABARRE St\\u00e9phane\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Virginie PIEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/df1aed59-e90c-44b3-b554-9486d68418da_IMG_4636_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"380000836\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Gr\\u00e9gory FAGOT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BONHOMME Margot\", \"beneficiary_dob\": null, \"invoice_date\": \"14-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Gr\\u00e9gory FAGOT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f1ea3eb8-6ad6-4268-ae30-6903e2555fb3_Facture_FA-1544-001097_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9siologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tiffanie LAURENT\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Stephanie GUENEAU\", \"beneficiary_dob\": \"20-04-1989\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"El Tiffanie LAURENT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Kin\\u00e9sioologie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c233ac94-6806-4b40-868f-c0e5b7ccda8f_2025-02-00012_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010378718\", \"finess_number\": null, \"doctor_name\": \"Axel SEDOUD\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nino Guadagna\", \"beneficiary_dob\": \"12-09-1988\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Axel SEDOUD (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/abd25cab-e2d5-456c-ac6c-d209b698daf2_Le_Monnier_Fauche_Nolan_facture_2025-01-31_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"950000430\", \"rpps_number\": \"10006093487\", \"finess_number\": null, \"doctor_name\": \"Franck MAURICE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE MONNIER FAUCHE NOLAN\", \"beneficiary_dob\": null, \"invoice_date\": \"29-11-2024\", \"security_number\": null, \"invoice_issuer\": \"Cabinet Ost\\u00e9o St-Loupien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-11-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/12ebca6a-2e94-4739-a692-1100f6722a5f_Le_Monnier_Fauche_Nolan_facture_2025-01-17_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"950000430\", \"rpps_number\": \"10006093487\", \"finess_number\": null, \"doctor_name\": \"Franck MAURICE\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE MONNIER FAUCHE NOLAN\", \"beneficiary_dob\": null, \"invoice_date\": \"17-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet Ost\\u00e9o St-Loupien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0d84600c-c8f4-4727-8df4-809364681e25_Facture_osteopathie_Compain_Frederic_0125_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690023510\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GARCIA Alexane\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COMPAIN Fr\\u00e9d\\u00e9ric\", \"beneficiary_dob\": \"28-02-1971\", \"invoice_date\": \"28-01-2025\", \"security_number\": null, \"invoice_issuer\": \"GARCIA Alexane\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/51ca4ba2-5d66-4fee-a12e-64c135b684ba_IMG_20250209_094156_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"700000524\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mijanou PATHIER\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"METZELARD MARECHAL Marie - Christine\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mijanou PATHIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f58f9a38-60a6-4f41-98f5-26852a68d8c7_Facture_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"770009447\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LINDA FAHEM\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEVENEV Gelles\", \"beneficiary_dob\": \"04-04-1970\", \"invoice_date\": \"18-01-2025\", \"security_number\": null, \"invoice_issuer\": \"LINDA FAHEM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique en cabinet\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/270a7f73-f7ee-4622-9b11-085dfeb43e3b_facture_osteo_07_02_2025_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"460001316\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Barbieri Stefano\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Voulyze Marc\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"164015935013723\", \"invoice_issuer\": \"BARBIERI Stefano\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fd8187e1-b9eb-4727-8041-9fc9e6f1affb_0556_001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"541057105\", \"rpps_number\": \"10002355963\", \"finess_number\": null, \"doctor_name\": \"Dr Olivier JAVELLE\", \"total_billed\": 15, \"bill_paid\": true, \"amount_paid\": 15, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GERMANN LIONEL\", \"beneficiary_dob\": \"19-10-1974\", \"invoice_date\": \"08-02-2025\", \"security_number\": \"174109301004926\", \"invoice_issuer\": \"Dr Olivier JAVELLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OSTEOPATHIE Cot\\u00e9e HN\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/635d32b5-0f6e-4c3e-8dca-8d1595039d0b_BAJARD_GREGORY_FACTURE__n20431_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Pharmacie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 6.9, \"bill_paid\": true, \"amount_paid\": 6.9, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BAJARD GREGORY\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SELARL PHARMACIE DE LA TOUR RONDE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"TOPLEXIL SOL BUV S SUC FL150ML\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 6.9}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e8fd236d-45ef-4b02-a870-d29de5d98b83_NATIVEL_CHRISTELLE_29012025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"640008546\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Miriam LE GENISSEL\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NATIVEL CHRISTELLE\", \"beneficiary_dob\": \"26-04-1983\", \"invoice_date\": \"29-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Miriam LE GENISSEL - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6555a519-f9f4-4b6a-ae0d-fd491dcf9a96_Bidard_Dylan_facture_2025-02-08_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"770010809\", \"rpps_number\": \"10010372562\", \"finess_number\": null, \"doctor_name\": \"Santamaria Ilona\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BIDARD DYLAN\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"El Santamaria Ilona\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/20f7572a-00e5-45f9-8812-e6d2037826ea_IMG_1767_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"740009873\", \"rpps_number\": \"10010722253\", \"finess_number\": null, \"doctor_name\": \"Eva BORDONARO\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laura BOURIGAULT\", \"beneficiary_dob\": \"05-11-1999\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Eva BORDONARO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/265728b3-aae9-4d6e-a505-cb00b3c7f5ee_2025-02-08-guillaume-legrain_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"380001777\", \"rpps_number\": \"10010721552\", \"finess_number\": null, \"doctor_name\": \"Claire COURTY\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEGRAIN Guillaume\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE Claire COURTY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2d2088f3-6b10-44cf-ac12-5c3a9529613b_facture_osteo_GL_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"380011557\", \"rpps_number\": \"10010721552\", \"finess_number\": null, \"doctor_name\": \"Claire COURTY\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEGRAIN Guillaume\", \"beneficiary_dob\": null, \"invoice_date\": \"11-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE Claire COURTY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b17619d6-7f32-427d-840b-75e30fbbb27b_20250208_180351_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"310006705\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Th\\u00e9o GUERIN\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BRIGNON Olivier\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": \"174078313717397\", \"invoice_issuer\": \"Th\\u00e9o GUERIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7ae524fa-d82a-4a51-a670-cd6da912ff46_IMG_20250207_130257_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"547001156\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COUTURIER DAVID\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VERONIQUE DI GENNI\", \"beneficiary_dob\": null, \"invoice_date\": \"10-01-2025\", \"security_number\": \"265128902407415\", \"invoice_issuer\": \"DAVID COUTURIER (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/83f38474-d109-4edc-a395-0025e6259d96_Facture_250128_Osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"870000254\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fran\\u00e7ois DOUSSAUD\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUBREUIL Eva\", \"beneficiary_dob\": null, \"invoice_date\": \"28-01-2025\", \"security_number\": \"203128708516324\", \"invoice_issuer\": \"Fran\\u00e7ois DOUSSAUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie de DUBREUIL Eva\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/24cab2a2-025c-4e67-96c7-07c214ac8340_facture_acquittee_Cordier_Mikael_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620001461\", \"rpps_number\": \"10006845688\", \"finess_number\": null, \"doctor_name\": \"Ludivine CACHERA-DEMAILLY E.I.\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CORDIER Mika\\u00ebl\", \"beneficiary_dob\": null, \"invoice_date\": \"28-01-2025\", \"security_number\": \"176065917810551\", \"invoice_issuer\": \"Ludivine CACHERA-DEMAILLY E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ce0dfd61-9f50-4358-88bc-4f6692cddb20_2025-02-00018_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340020411\", \"rpps_number\": \"10010653813\", \"finess_number\": null, \"doctor_name\": \"Cl\\u00e9ment GAUTI\\u00c9\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cynthia Sans\", \"beneficiary_dob\": \"10-06-1981\", \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cl\\u00e9ment GAUTI\\u00c9 (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a9acd6ec-4874-44e7-af69-7cf642e01a22_2025-02-00024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340020411\", \"rpps_number\": \"10010653813\", \"finess_number\": null, \"doctor_name\": \"Cl\\u00e9ment GAUTI\\u00c9\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alessandro Sans\", \"beneficiary_dob\": \"09-10-2010\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cl\\u00e9ment GAUTI\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bad0f823-e7d4-41be-87a2-1634254db924_IMG_20250207_130207_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"540002714\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COURTIAL Pauline\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DI GENNI V\\u00e9ronique\", \"beneficiary_dob\": null, \"invoice_date\": \"28-11-2024\", \"security_number\": null, \"invoice_issuer\": \"COURTIAL Pauline\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"28-11-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ce769ca9-924b-4355-bb1f-ce98c6a7049b_IMG_5741_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010354065\", \"finess_number\": null, \"doctor_name\": \"Rustant Camille\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BRIEN Juliette\", \"beneficiary_dob\": \"05-08-1996\", \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OSTEOPATHIE Fanny HUDAULT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation adulte\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7bad16be-dff9-4b92-8041-4a9c38638e29_facturation_osteo-1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690007729\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHOUDAR MOHAMED\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Denais Margaux\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CHOUDAR MOHAMED\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/48df624c-9ff3-4155-94b4-773bb00a967a_Facture_osteopathie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350007043\", \"rpps_number\": \"10010559762\", \"finess_number\": null, \"doctor_name\": \"Marine Briens\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Corentin JEGOU\", \"beneficiary_dob\": \"13-08-1993\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Marine Briens\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c6b4c0fd-7b33-462a-9e0d-5c39848389ae_Facture_HAMARTDamien_20250124KGKK_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"780015319\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"OLSZAK CHARLOTTE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HAMART Damien\", \"beneficiary_dob\": \"01-04-1993\", \"invoice_date\": \"24-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET DE CHIROPRAXIE VERNOUILLET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi\", \"quantity\": 1, \"date_of_service\": \"24-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b6ecc678-8a40-4e52-bc31-09aab0782121_IMG_5670_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cyril VITE\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Devrue Constance\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cyril VITE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention(s) \\u00e9tiopathique(s)\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/189462d7-2c53-403e-b27f-156f210c29e0_Facture_osteo__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"570003681\", \"rpps_number\": \"10010130622\", \"finess_number\": null, \"doctor_name\": \"Clara BATTISTINI\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rebois Thomas\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Clara BATTISTINI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5cc75e2d-0993-44d4-b4e8-901902e4f7d3_FACTURE_MORICELLY_Clara_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"770011682\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PECHENART Claire\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MORICELLY Clara\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/04e123fd-f7d8-4c35-8256-c0e172d61f46_Osteopathe_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"840000681\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Magali FAVARO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ARMANGAU Thierry\", \"beneficiary_dob\": null, \"invoice_date\": \"30-05-2024\", \"security_number\": null, \"invoice_issuer\": \"Magali FAVARO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-05-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/127a26a9-785f-4059-ad4b-1000d1476ead_Stephane_BRETON_250207006_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"950000075\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Stephane BRETON\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Florent Riche\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Stephane BRETON E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2d79f19d-3ec5-4cdf-a88b-040e733860fa_Osteopathe_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"840000881\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Magali FAVARO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ARMANGAU Thierry\", \"beneficiary_dob\": null, \"invoice_date\": \"05-07-2024\", \"security_number\": null, \"invoice_issuer\": \"Magali FAVARO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-07-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3300ad26-0cd1-496d-9ddb-11a9bae1bc9e_IMG_2554_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350003745\", \"rpps_number\": \"10010555489\", \"finess_number\": null, \"doctor_name\": \"LARSSON Alix\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chauchoy Michael\", \"beneficiary_dob\": \"04-02-1987\", \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie d'Antoine Corbel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie - Consultation du 27/01/2025\", \"quantity\": 1, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6ee4cfd9-ce15-4a71-875a-051b09b89dc4_IMG_20250208_134358_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"80366180000042\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ARMAUBEC JULIE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Agimi Haeiet\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"ARMAUBEC JULIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/09b288ae-55d9-41ae-9f6e-f1cfc61e8130_facture_1588_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340001411\", \"rpps_number\": \"10010738473\", \"finess_number\": null, \"doctor_name\": \"Alexandre Viguier\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Perez Olivier\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Alexandre Viguier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/834a074c-811f-4d27-9c19-134a4b9ab2ea_FACTURE-25-02-208-GEORGES_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LAURA JUMIAUX\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Elo\\u00efse GEORGES\", \"beneficiary_dob\": \"24-05-1997\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET OZ - LAURA JUMIAUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance Hypnoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/588bd118-1770-4c2a-8d86-a6a6cbed79b2_osteopathe_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"400001566\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Amandine LAMY\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TEILLERA Amandine\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Amandine LAMY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bad7f9ef-347f-4ab6-b69d-c99a726769f2_20250208125718_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"710001652\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DUPUIS Laura\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M Olivier Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"DUPUIS Laura\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7c2574b4-814d-46a5-aa87-a4402f46be9c_DAGHOU_Mustapha_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930007398\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marine DELAFOY\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DAGHOU Mustapha\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"El Marine DELAFOY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/91e5ae14-79da-43f0-bf80-e0f8f3ce3abe_FACTURE-25-02-1283-BENSALEM_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COLINE ESCOFFIER\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NADEGE NADA BENSalem\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"COLINE ESCOFFIER - EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"HYPNOSE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/550918c3-75ce-45bf-b291-1d08f5559afb_Facture_Osteo_du_15.01.2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"240001347\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Kevin Vanquickelberghe\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HUTEAU Fr\\u00e9d\\u00e9ric\", \"beneficiary_dob\": null, \"invoice_date\": \"15-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Kevin Vanquickelberghe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7d3ed5db-8ea9-48bb-870b-d67be0b7eb95_FACTURE_OSTEOPATHE_FEVRIER_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130010051\", \"rpps_number\": \"10010780806\", \"finess_number\": null, \"doctor_name\": \"Coralie DEDIEU\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Andr\\u00e9a PASLAWSKI\", \"beneficiary_dob\": \"24-09-1976\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Coralie DEDIEU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4377eaa0-4f03-404e-86d3-fb386ae03826_facture_osteo_j3l_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330011131\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MEDALIN DIDIER EI\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LACAZE LABARRERE JEAN\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"MEDALIN DIDIER EI, OST\\u00c9OPATHE D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ed44bada-dfae-4ab1-b810-e47cec83e3d9_PXL_20250206_222803759.MP_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930001276\", \"rpps_number\": \"10010315413\", \"finess_number\": null, \"doctor_name\": \"AV\\u00c9DISSIAN Vasken\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marin Jordan\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"AV\\u00c9DISSIAN Vasken\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins ost\\u00e9opathiques\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/98a1de7a-56cb-4a78-9416-4897bd412563_Screenshot_20250208_110655_com.google.android.apps.docs_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"800000564\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BRIGATTI Romain\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LECOMTE Jany\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": \"1760802691135\", \"invoice_issuer\": \"Romain BRIGATTI Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de LECOMTE Jany le 08/02/2025\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4ba5828c-76aa-45e9-a89f-1257021074c9_Claire_Osteo_20250207_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"020000774\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florent NAUD\\u00c9\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FURTAK Claire\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Florent NAUD\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/eed202e3-72db-44e4-94ba-9ddc3bb95d4d_Osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330018045\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9l\\u00e8ne Leroux\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRIC GLEIZE\", \"beneficiary_dob\": null, \"invoice_date\": \"22-01-2025\", \"security_number\": \"173039202603014\", \"invoice_issuer\": \"H\\u00e9l\\u00e8ne Leroux\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"22-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/05040333-5a62-4f03-966b-d36c7e96203d_2025-02-07-nicolas-houdart_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010457660\", \"finess_number\": null, \"doctor_name\": \"Marion RAGONS\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HOUDART Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE Marion RAGONS El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0924cb70-e199-46f7-a969-5ed7a4571b22_Osteo_sarah_05fevrier_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"840005847\", \"rpps_number\": \"10010800737\", \"finess_number\": null, \"doctor_name\": \"Laure Boutier\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme COUST\\u00c8S Sarah\", \"beneficiary_dob\": \"15-02-1992\", \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Boutier Laure\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation - 05/02/2025\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0308b7e0-8da0-4bfe-a578-57eb88e0511a_recu-fac-002296_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330004342\", \"rpps_number\": \"10010586088\", \"finess_number\": null, \"doctor_name\": \"Andurand C\\u00e9line\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIBIC Corentin\", \"beneficiary_dob\": \"28-12-1995\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OSTEOPATHIE ANDURAND CELINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie Adulte - 07/02/2025\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a2da2013-8b36-453d-b6cc-d9c8d3336947_Osteo_janvier_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"950004119\", \"rpps_number\": \"10010332350\", \"finess_number\": null, \"doctor_name\": \"Allison GUYEN-GUDLIN\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLEURY Na\\u00eblacha\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"293059306425945\", \"invoice_issuer\": \"Mme Allison GUYEN-GUDLIN EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8a2d6396-0bf5-4603-9e0a-ee7722792bbd_IMG_2982_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"920016680\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathis COMMEYRAS\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAUGEROTTE Maximilien\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mathis COMMEYRAS Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b3308bb4-484d-4e7a-80c0-ce0b6e15b9a6_1739005481515802910962240438266_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"160000303\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DEPEYRAS Jean-Christophe\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAUFET Ambre\", \"beneficiary_dob\": null, \"invoice_date\": \"23-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie DEPEYRAS Jean-Christophe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-12-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2b913bc6-1824-496b-a8fb-6bcb12cff419_IMAG0002_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770003382\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Am\\u00e9lie FORTIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LECUYER Alexandra\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Am\\u00e9lie FORTIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a337feee-f8d9-4f6f-91e5-382b10999080_facture_tissier_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SYLVIE TISSIER\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"D'ANTONIO LIONEL\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SYLVIE TISSIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Hypose - 04/02/24\", \"quantity\": 1, \"date_of_service\": \"04-02-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c64d0b90-f96f-432c-8ad7-52567dd9efc4_DRAPERI_Delphine_734_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010805769\", \"finess_number\": null, \"doctor_name\": \"Eddy HADDAD\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Delphine DRAPERI\", \"beneficiary_dob\": null, \"invoice_date\": \"26-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Eddy HADDAD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/10456d78-f2da-4296-bd80-35176e9be1df_2025-02-08_09-05_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"260002274\", \"rpps_number\": \"10010688256\", \"finess_number\": null, \"doctor_name\": \"PHILIPPE C\\u00e9cile\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BROT La\\u00ebtitia\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": \"291070732402968\", \"invoice_issuer\": \"PHILIPPE C\\u00e9cile\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/94f495e7-f601-4213-b406-f4071faae35f_Facture_OSteo_06-02-25_KOCH_Frederic_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780012365\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Izzillo Quentin\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KOCH Frederic\", \"beneficiary_dob\": \"25-04-1975\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Maison M\\u00e9dicale Mareil\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LA QUEUE LES YVELINES\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/502e402e-ab98-4ac1-b19c-167cbb86645b_brotons_stephane_facture_2025-02-06_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"830807662\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Axelle MICHEL\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BROTONS Stephane\", \"beneficiary_dob\": \"24-02-1973\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET CHIROPRATIQUE VALGORA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance effectu\\u00e9e\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/16e05f44-e504-483f-99ac-65691a7f95b7_IMG_20250208_074424411_1_1_scale_0.4.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"J\\u00e9r\\u00f4me CUSSAT\", \"total_billed\": 30, \"bill_paid\": true, \"amount_paid\": 30, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAGNAT Julien\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"J\\u00e9r\\u00f4me CUSSAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'hypoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e2092848-ae0d-4f5b-90a1-e1866d5446be_Screenshot_20250207_204026_OneDrive_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690009469\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CALATAYUD Ana\\u00efs\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAIN\\u00c7ON Habtamua\", \"beneficiary_dob\": \"12-09-1989\", \"invoice_date\": \"03-12-2024\", \"security_number\": \"2890999315031\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie de Sathonay-Camp\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-12-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/728bd294-3373-4304-ab58-376379f51c8d_recu-fac-1147_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010803095\", \"finess_number\": null, \"doctor_name\": \"Tommy Untereiner\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Santoro Gianni\", \"beneficiary_dob\": \"30-07-1988\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Tommy Untereiner\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation adulte - 07/02/2025\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7b4e09f4-e628-4c0e-9a91-3ffa1b9c3288_Screenshot_20250207_204046_OneDrive_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690009469\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CALATAYUD Ana\\u00efs\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAIN\\u00c7ON Habtamua\", \"beneficiary_dob\": \"12-09-1989\", \"invoice_date\": \"10-01-2025\", \"security_number\": \"2890999315031\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie de Sathonay-Camp\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4ef6454c-e7e6-4b35-9e66-f41d56027d94_Facture_MARCIN_Amandine0702_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910008135\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Justine NO\\u00cbL\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARCIN Amandine\", \"beneficiary_dob\": \"17-04-1993\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Justine NO\\u00cbL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7f149e5d-fb1f-42d4-bb3d-f4a2029f409b_IMG_20250207_232242_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"770006120\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vincent TURLIN\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emmanuel Lelong\", \"beneficiary_dob\": null, \"invoice_date\": \"28-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Turlin Vincent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4db4b4db-54c8-4918-af00-085ad7fc5353_recu-fac-2025-0161_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010318185\", \"finess_number\": null, \"doctor_name\": \"DE CARVALHO Bruno\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHEA Christian\", \"beneficiary_dob\": \"06-01-1984\", \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"DE CARVALHO Bruno\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie - 03/02/2025\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9ff7b717-9b1a-4bbe-9ac0-7575533eb17a_Facture_CORTARAEvie_20250205IIES_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"830009734\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \" Paul Bourelle\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nourrisson CORTARA Evie\", \"beneficiary_dob\": \"09-12-2024\", \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mr Paul Bourelle Chiropracteur\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Prestation\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/05c2ef83-54cb-419e-949f-74f898b459bd_osteo_13112024_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"917323032\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alain FAUCHEREAU\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LESTRA Chantal\", \"beneficiary_dob\": null, \"invoice_date\": \"13-11-2024\", \"security_number\": null, \"invoice_issuer\": \"Alain FAUCHEREAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins\", \"quantity\": null, \"date_of_service\": \"13-11-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f7df49da-2cf8-4584-8306-0d10ea1994a3_Melot_Quentin_facture_2025-02-07_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"920012093\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas NEULAS\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. MELOT Quentin\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Thomas NEULAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/53732e2e-23a5-44fb-85d5-df257de74d93_IMG_9627_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010317443\", \"finess_number\": null, \"doctor_name\": \"LALOU BENJAMIN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DAGORN Roman\", \"beneficiary_dob\": null, \"invoice_date\": \"21-01-2025\", \"security_number\": null, \"invoice_issuer\": \"LALOU BENJAMIN - OSTEOPATHE D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0806796f-858e-426d-b790-239a0be24aa2_Facture_Justin_renaud_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"060014776\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Hourcastagnou Jacques\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Justin Renaud\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BELHARRA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1bb42dc7-7ee0-45fe-aed7-68192d875df3_IMG_20250207_193157_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"541028890\", \"rpps_number\": \"10101871696\", \"finess_number\": null, \"doctor_name\": \"Benjamin FRESSE\", \"total_billed\": 15, \"bill_paid\": true, \"amount_paid\": 15, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GFELLER Marian\", \"beneficiary_dob\": \"27-04-1998\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Dr Benjamin FRESSE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 15}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3374a3a0-4ff8-4056-957f-8fedca1cd314_17389546040733582030933880599903_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"210000048\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SPINNEWEBER Mathieu\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ALLEMEERSCH Virginie\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": \"280081038720429\", \"invoice_issuer\": \"SPINNEWEBER Mathieu\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a9a59dee-6d84-481f-bfec-e6a6232a5d84_Screenshot_2025-02-07-19-27-03-805_com.adobe.reader-edit_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910005404\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Manon SANSONI\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEMAIRE Marjorie\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Manon SANSONI - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8794ba0a-5d58-4d01-a1be-71f7d5c4bfe2_IMG_20250207_185936_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"470000357\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Beno\\u00eet CAMPAGNOL\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Tony MORA\", \"beneficiary_dob\": null, \"invoice_date\": \"11-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Beno\\u00eet CAMPAGNOL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins ost\\u00e9opathiques\", \"quantity\": 1, \"date_of_service\": \"11-12-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/778f559f-f1d7-4d40-86b4-f339223fcd31_2500107-dencausse-bernard-131564939567a629892b741_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930002092\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sophie Rossini\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bernard DENCAUSSE\", \"beneficiary_dob\": \"11-05-1962\", \"invoice_date\": null, \"security_number\": \"162053306312503\", \"invoice_issuer\": \"Sophie Rossini\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2b3ed78c-1a3c-40b3-b997-0e74f6ba497f_FENOLLAR_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"250003134\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HANS Xavier\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FENOLLAR Thierry\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Clinique Ost\\u00e9opathique Besan\\u00e7on\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cf4fa87b-649f-4c6b-8a70-3ce5755256fa_Attestation_20250205104300934_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"130008436\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nicolas NOLIN D.C.\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOUTIGNY Philippe\", \"beneficiary_dob\": \"30-05-1964\", \"invoice_date\": \"05-02-2025\", \"security_number\": \"164057501009107\", \"invoice_issuer\": \"CENTRE CHIROPRATIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/44b08a09-fbcf-4765-a8ea-01b9c9a96076_ostheo_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780010328\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Christ Arthur\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DISCHER Corinne\", \"beneficiary_dob\": null, \"invoice_date\": \"01-10-2024\", \"security_number\": null, \"invoice_issuer\": \"Christ Arthur\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-10-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a028d21a-396a-4259-89a9-cbbf390d4bc4_IMG_20250207_170536_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"517056578\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Flavolane Robert\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Olivier ALEXANDRE\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"FLAVOLANE ROBERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"suivi d'acte d\\u00e9conventionn\\u00e9 de th\\u00e9rapie manuelle (ost\\u00e9opathie)\", \"quantity\": 1, \"date_of_service\": \"05-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a0d87d8e-3531-4958-a5e6-e61ccb342f63_OSTHEO_HUART_AURELIE_06022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330000654\", \"rpps_number\": \"10010580693\", \"finess_number\": null, \"doctor_name\": \"Marion L\\u00c9T\\u00c9\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HUART Aur\\u00e9lie\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Marion L\\u00c9T\\u00c9 (E.I.)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/926fb738-5dcd-4f49-9d01-9660084ef8f4_Facture_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"380003459\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HERSCHENDORFER Axel\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROUZAUD Florian\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Axel HERSCHENDORFER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de ROUZAUD Florian le 27/01/2025\", \"quantity\": 1, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ce713030-9b73-491a-aff5-3ac7f59813d5_2025-02-07___PARRENIN_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"rpps_number\": \"10109842699\", \"doctor_name\": \"Stephan PARRENIN\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SASU Stephan PARRENIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soin de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Soin de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"17-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"beneficiary_name\": \"JACQUARD Chantal\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9b6fc1fe-1dee-498e-a7cf-6e8470364895_Facture_FA-7499-000431_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Amandine Delhomme\", \"total_billed\": 40, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ana\\u00e9 LARS VAYSSIERE\", \"beneficiary_dob\": \"12-03-2016\", \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"El Amandine Delhomme\", \"currency\": \"EUR\", \"items\": [{\"description\": \"3 f\\u00e9vrier 2025 \\u2022 Soin enfant -10 ans\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0162e947-354e-465a-889c-729ecf1222d8_Fannene_David_facture_2025-02-07_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"830011011\", \"rpps_number\": \"10010804655\", \"finess_number\": null, \"doctor_name\": \"Thibault Lugand\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FANNENE DAVID\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Thibault Lugand\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0aeb60b0-d507-4496-b596-e0456aef01ec_20250131_Ambre_Kirschner_honoraires_sophrologie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lie Ingrand\", \"total_billed\": 70, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ambre Kirschner\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Aur\\u00e9lie Ingrand EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de sophrologie\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e5200778-8114-415c-8304-539e8e8876a0_CamScanner_07-02-2025_11.49_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330014507\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Gilles GRAVELLIER\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alain MILLAT\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Monsieur Gilles GRAVELLIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0d7eaf2f-8f69-49ed-8058-04811a3b3b38_Adobe_Scan_7_fevr._2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"769319336\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LEDR SANDER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAFFEIS MAUD\", \"beneficiary_dob\": \"17-12-1974\", \"invoice_date\": \"07-02-2025\", \"security_number\": \"274125917008270\", \"invoice_issuer\": \"LEDR SANDER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychoth\\u00e9rapie - hypnoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2b987e55-9575-4a74-a557-94540c2a7548_Scan_CPN_STL_Siat_20250207161910_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"690020383\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Oriane DJEDID\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Detemznn Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"24-10-2024\", \"security_number\": \"1620668224087\", \"invoice_issuer\": \"Oriane DJEDID D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) de chiropratique\", \"quantity\": 1, \"date_of_service\": \"24-10-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/90196d01-d626-4a92-b1ff-dd5e0c910b3a_2025-02-00120_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"250003555\", \"rpps_number\": \"10010434875\", \"finess_number\": null, \"doctor_name\": \"In\\u00e8s LAATIAOUI\", \"total_billed\": 42, \"bill_paid\": true, \"amount_paid\": 42, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Killian BOUGUETOCH\", \"beneficiary_dob\": \"21-12-1999\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"La Villa Curea\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 42}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f894f446-28ce-491b-b6fb-f138c6406def_Scan_CPN_STL_Siat_20250207161822_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"690020383\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Oriane DJEDID\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dietemann Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"23-08-2024\", \"security_number\": \"1620668224087\", \"invoice_issuer\": \"Oriane DJEDID D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) de chiropratique\", \"quantity\": 1, \"date_of_service\": \"23-08-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f0f3289d-b018-4f81-b1d9-a2335b105d18_IMG_0857_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930003512\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marine MISSONNIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HAUGEL Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"10-01-2025\", \"security_number\": null, \"invoice_issuer\": \"70 ESPACE SANT\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"intervention(s) en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d3c47fa9-c64c-4020-a887-77cb4f26fa2a_IMG_0856_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930003512\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marine MISSONNIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HAUGUEL Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"03-01-2025\", \"security_number\": null, \"invoice_issuer\": \"70 ESPACE SANT\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"intervention(s) en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/aa50cf96-f4cb-4848-a392-99daa5efa987_Facture_ZINT_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010265626\", \"finess_number\": null, \"doctor_name\": \"FRAN\\u00c7OIS Marion\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ZINT Alexis\", \"beneficiary_dob\": \"24-12-1993\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"FRAN\\u00c7OIS Marion E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathic consultation\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/29d72de2-da83-483a-8702-939fbe4addfa_20250207_112449_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"0249104137\", \"rpps_number\": \"10010530284\", \"finess_number\": null, \"doctor_name\": \"Laure Mazerat\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Stephanie Cupit\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Laure Mazerat Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"HONORAIRES\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6d9a0fdf-c334-43b6-946b-45b1b1ab83d7_Facture_osteo_FEV_25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910009935\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lucas Margonty\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Kengadaran C\\u00e9line\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Lucas Margonty\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0aa44c3d-2dc8-47fc-9307-d1485df454c1_20250207152016_001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690010798\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Antoine DACHEVILLE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIFFARD Julien\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Antoine DACHEVILLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Note d'honoraires en Ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4f520f07-40b1-41a0-b3eb-0669e69dcda8_FACTURE_RICHETIER_A_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"240002675\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Adrien Unia\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RICHETIER Alexandra\", \"beneficiary_dob\": \"26-11-1994\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Adrien Unia Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3e214583-2362-4d68-a96e-ac34df3a2bb7_osteo_ju_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"060012358\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florian RUSCH\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JULIEN Lai\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"RUSCH Florian\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5df53e4c-13e5-4c12-91a2-24236c5cfa5f_IMG_20250207_154557_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770000321\", \"rpps_number\": \"10010306503\", \"finess_number\": null, \"doctor_name\": \"LETORD Philippe\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TRODOUX Davy\", \"beneficiary_dob\": \"14-03-1979\", \"invoice_date\": \"06-02-2025\", \"security_number\": \"1790308480023\", \"invoice_issuer\": \"LETORD Philippe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/03c955b1-c201-4ec2-b5e0-42a11e04908f_facture_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130003296\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"St\\u00e9phane MADIGNIER\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROBERT JEAN\", \"beneficiary_dob\": null, \"invoice_date\": \"09-02-2025\", \"security_number\": \"191093148822116\", \"invoice_issuer\": \"St\\u00e9phane MADIGNIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d1d62a23-a37a-4bae-b6f5-a28a137ef0af_RECTIFY_IMG_20250207_153454_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"547001156\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COUTURIER DAVID\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MICKAEL ENGELMANN\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": \"197065439572518\", \"invoice_issuer\": \"DAVID COUTURIER (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f1313200-92c8-4b7d-93e9-c010275f811c_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780014999\", \"rpps_number\": \"10010379864\", \"finess_number\": null, \"doctor_name\": \"Maxime BOURGET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"St\\u00e9phane CHEVRE\", \"beneficiary_dob\": \"19-06-1973\", \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Maxime BOURGET (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b685d5dd-e01e-4fa9-b7aa-17abd1a25b74_IMG_5544_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930000070\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dan CHEKROUN\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HODONOU Cherylanne\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Dan CHEKROUN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/13388e62-739c-4338-9b78-eb1c67fc740f_Facture_Osteo_02.25_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/13388e62-739c-4338-9b78-eb1c67fc740f_Facture_Osteo_02.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590008553\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Antoine BOCQUET\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M HAAGE Julien\", \"beneficiary_dob\": \"20-09-1980\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Antoine BOCQUET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/74ccd12a-e2c5-4995-99a9-16326c3fdd69_Facture_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910004142\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ODIARDO Rudy\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"D\\u00c9SIR\\u00c9E Kathy\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Rudy Odiardo\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de D\\u00c9SIR\\u00c9E Kathy\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2f042a0e-e23e-4e4a-af6d-9b013f1509bd_facture_ostheo_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"211035902\", \"rpps_number\": \"10002151594\", \"finess_number\": null, \"doctor_name\": \"Corinne Galland\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Christophe AUCOUTURIER\", \"beneficiary_dob\": \"30-05-1972\", \"invoice_date\": \"23-01-2025\", \"security_number\": \"172056330005406\", \"invoice_issuer\": \"Docteur Corinne Galland\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SCEANCE D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1709a87a-d859-41e8-93da-5f307e1677e8_facture_ostheo_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"211035902\", \"rpps_number\": \"10002151594\", \"finess_number\": null, \"doctor_name\": \"Corinne Galland\", \"total_billed\": 53, \"bill_paid\": true, \"amount_paid\": 53, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Christophe AUCOUTURIER\", \"beneficiary_dob\": \"30-05-1972\", \"invoice_date\": \"26-12-2024\", \"security_number\": \"172056330005406\", \"invoice_issuer\": \"Docteur Corinne Galland\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-12-2024\", \"mandatory_coverage\": null, \"amount\": 53}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4b0d7324-9a46-4f2c-872b-4b9c06cb0566_595570692474790_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620007039\", \"rpps_number\": \"10010189842\", \"finess_number\": null, \"doctor_name\": \"VALEMBOIS Paul\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AMOUREUX Thomas\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"VALEMBOIS Paul\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3be4a9ab-b1e7-4567-b3c2-8f330829bff4_FACTURE_KINESIOLOGUE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Claire Pascal\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"ClairEnerg\\u00e9tique\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soin \\u00e9nerg\\u00e9tique en cabinet\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/07243018-c418-446f-8dca-307fc443b872_FACTURE_ETIOPATHE_5_FEVRIER_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Gaylord SANTONI\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Da Silva Reis Lydie\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": \"261017867004293\", \"invoice_issuer\": \"Gaylord SANTONI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention(s) \\u00e9tiopathique(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6336c949-83db-496b-aa71-47579a0c85d7_FA_REFLEXOLOGIE_1_25-1651_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"R\\u00e9flexologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VERGNET Val\\u00e9rie\", \"total_billed\": 25, \"bill_paid\": true, \"amount_paid\": 25, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHARRI\\u00c9 Florence\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"VERGNET Val\\u00e9rie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"R\\u00e9flexologie soins de support\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 25}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f40d1b67-7690-4d39-8578-eca04cfd2b3c_Factures_Ethiopathe_Dimitri_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f40d1b67-7690-4d39-8578-eca04cfd2b3c_Factures_Ethiopathe_Dimitri_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Etiopathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Manon DABOS\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nouhaud Dimitri\", \"beneficiary_dob\": null, \"invoice_date\": \"11-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Manon DABOS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'etiopathie \\u00e0 Caudecoste\", \"quantity\": 1, \"date_of_service\": \"11-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}, {\"is_bill\": true, \"profession\": \"Etiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Manon DABOS\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nouhaud Dimitri\", \"beneficiary_dob\": null, \"invoice_date\": \"25-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Manon DABOS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'etiopathie \\u00e0 Caudecoste\", \"quantity\": 1, \"date_of_service\": \"25-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/072fc9e8-d2ba-4bc7-9569-712845984ff8_Facture_ostheo_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/072fc9e8-d2ba-4bc7-9569-712845984ff8_Facture_ostheo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"740006895\", \"rpps_number\": \"10010098662\", \"finess_number\": null, \"doctor_name\": \"Marion CREPIN\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Le Darz C\\u00e9dric\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Marion CREPIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1e924563-2873-4ac4-aab4-444d155b9420_20250207_140030_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"930006697\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"L\\u00e9a Huygues-Beaufond\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cucchi Benjamin\", \"beneficiary_dob\": null, \"invoice_date\": \"01-02-2025\", \"security_number\": null, \"invoice_issuer\": \"L\\u00e9a Huygues-Beaufond\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/831984b8-6c87-4f5d-b0a0-2d0ab5cc9061_20250207_140107_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"600001812\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Margaux Gonzalez\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cucchi Benjamin\", \"beneficiary_dob\": null, \"invoice_date\": \"14-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Margaux Gonzalez\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-12-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/521f9458-85d3-42b9-9468-6c1fe0257216_1738931285265_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": \"590006565\", \"rpps_number\": \"10005347728\", \"finess_number\": null, \"doctor_name\": \"Nathana\\u00ebl MONFORT\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RINGEVAL Emeric\", \"beneficiary_dob\": null, \"invoice_date\": \"13-06-2024\", \"security_number\": \"100115932912185\", \"invoice_issuer\": \"Mr Nathana\\u00ebl MONFORT EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'hypnoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"13-06-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0ba9710d-4d19-495c-b908-63c10016ce17_1738931285320_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": \"590006565\", \"rpps_number\": \"10005347728\", \"finess_number\": null, \"doctor_name\": \"Nathana\\u00ebl MONFORT\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RINGEVAL Emeric\", \"beneficiary_dob\": null, \"invoice_date\": \"18-03-2024\", \"security_number\": \"100115932912185\", \"invoice_issuer\": \"Mr Nathana\\u00ebl MONFORT El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'hypnoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"18-03-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f54d4617-f42c-42a5-a0ad-43dadb2f93d8_Osteopathie_facture__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620004549\", \"rpps_number\": \"10010468394\", \"finess_number\": null, \"doctor_name\": \"Alice Rogalski\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SELLIEZ Alexis\", \"beneficiary_dob\": null, \"invoice_date\": \"17-12-2024\", \"security_number\": null, \"invoice_issuer\": \"EI ALICE ROGALSKI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-12-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/27f11c77-a849-4aac-8d53-f900c540bc72_osteopathie_tanguy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350000485\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thierry Buridant\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fabrice Tanguy\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Thierry Buridant\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie et de massage r\\u00e9flexe du tissu conjonctif\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2aba1d07-fcfa-4bb1-bc22-5040e91f47a0_1738931285365_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620003814\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"J.STANKO\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RINGEVAL Emeric\", \"beneficiary_dob\": null, \"invoice_date\": \"23-05-2024\", \"security_number\": null, \"invoice_issuer\": \"J.STANKO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-05-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5c719322-2486-428c-8c7e-1441497877cc_scan_tc_2025-02-07-13-36-23_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010712247\", \"finess_number\": null, \"doctor_name\": \"AXEL GUILLIEY\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COMARKEY RON Thierry\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": \"1690242044060\", \"invoice_issuer\": \"AXEL GUILLIEY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d783daf8-282e-4860-af41-904aa36f5405_PXL_20250207_1233101042_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"340010883\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Leslie ROBERT\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Grigis Tiffany\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Leslie ROBERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation chiropratique\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1d7b21c7-12f2-475d-aacd-a4b6e48c3291_osteopathe_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590005229\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Benjamin Rudent\", \"total_billed\": 63, \"bill_paid\": true, \"amount_paid\": 63, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Poujol Julie\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Benjamin Rudent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 63}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2992af58-6afb-4d25-9071-f2ad3ac22662_OSTEO_12.23_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"290002369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Kevin LABOUS\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HUE Didier\", \"beneficiary_dob\": null, \"invoice_date\": \"13-12-2023\", \"security_number\": null, \"invoice_issuer\": \"Kevin LABOUS - Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/74654024-6047-4a51-9640-1d88b7702907_OSTEO_01.24_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"830004560\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Beno\\u00eet MARTIN\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Kue Didier\", \"beneficiary_dob\": null, \"invoice_date\": \"26-01-2024\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OSTEOPATHIE Benoit MARTIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation \\u00e0 Mr / Mme : Kue Didier dans le cadre de soins d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-01-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/34918269-15fd-4e3d-9180-659530a82d35_OSTEO_01.25_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"850003625\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Benjamin PERREAUX\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"M. HUE Didier\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"03-01-2024\", \"security_number\": \"1510575012120\", \"invoice_issuer\": \"Benjamin PERREAUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-01-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/511f4232-e9d9-47e2-b8b4-d029f8dc1e6e_2025-01-23_OSTEO_ALCOCK_FAC-2170_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"530000538\", \"rpps_number\": \"10010528270\", \"finess_number\": null, \"doctor_name\": \"ALCOCK David\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Legeay Edwige\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"SARL OSTEO ALCOCK\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie - adulte - Consultation du 23/01/2025\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e05ac9a1-6890-45a8-9847-e2efdd2b50a5_FACT_OSTEO_4_FEVRIER_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"840000608\", \"rpps_number\": \"10010763992\", \"finess_number\": null, \"doctor_name\": \"Pierre Vene\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Hakem Karim\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"1760206088056\", \"invoice_issuer\": \"Pierre Vene\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a682cca0-0df7-4f71-b366-7525e73fb322_IMG_20250207_115651_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"630001535\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"POINCEAUX Joris\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. DRUOSCAT Julien\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"POINCEAUX Joris\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d5a8f5e5-7755-4791-910a-9c4079fa07f4_Facture_osteopathe_Madame_Gregoire_290125_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"440007078\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vincent Joyau\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GREGOIRE St\\u00e9phane\", \"beneficiary_dob\": \"24-02-1978\", \"invoice_date\": \"29-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Vincent Joyau\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/98ac502e-3415-4a70-bdf6-676be5448390_Mme_GIBERT_Camille_E.I._1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"380004200\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GIBERT Camille\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAVADIAU Ambre\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": \"204054214705901\", \"invoice_issuer\": \"Mme GIBERT Camille E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4969fd33-c173-4ae5-8020-28985599da71_17389252341351391827234693879142_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620006866\", \"rpps_number\": \"10010475845\", \"finess_number\": null, \"doctor_name\": \"Caroline ADANCOURT\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Helloboid Charles\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Caroline ADANCOURT Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fcd6e41f-3d4c-43c8-a6f7-85e7ea5bda1d_IMG_7926_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"500002753\", \"rpps_number\": \"10010114857\", \"finess_number\": null, \"doctor_name\": \"Agathe Bel\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Xavier FAUDEMER\", \"beneficiary_dob\": \"24-11-1978\", \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Agathe Bel - EI - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3116513f-22b1-4143-8f0b-97527093a9cb_Facture_osteo_fevrier_2024_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3116513f-22b1-4143-8f0b-97527093a9cb_Facture_osteo_fevrier_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590008553\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Antoine BOCQUET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M Julien HAAGE\", \"beneficiary_dob\": \"20-09-1980\", \"invoice_date\": \"13-02-2024\", \"security_number\": null, \"invoice_issuer\": \"Antoine BOCQUET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"13-02-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ae646ace-dc44-4377-a250-1c2a9fcbc011_sophrologie3_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SYLVIE JOLIMAY\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Olivier MOTLIN\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"SYLVIE JOLIMAY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Sophrologie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8588f081-a48b-4676-90e1-cabcd66a74ad_IMG_20250207_110801_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910001528\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jacques GRECO\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Bouenane Pandla\", \"insured_dob\": null, \"beneficiary_name\": \"Bouenane Pandla\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": \"293079206325640\", \"invoice_issuer\": \"Jacques GRECO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d0ed8e34-ae02-4aa0-9988-2c0b48ed94f7_20250207_110505_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130017189\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Olivier CHABANON\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M BUHLER STEPHANE\", \"beneficiary_dob\": \"10-10-1974\", \"invoice_date\": \"07-02-2025\", \"security_number\": \"174108212105789\", \"invoice_issuer\": \"Olivier CHABANON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"2025-02-07\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/75f7652c-7e29-494f-a02f-dd2e2c47c9e0_CamScanner_06-02-2025_15_32_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"380001602\", \"rpps_number\": \"10010676251\", \"finess_number\": null, \"doctor_name\": \"Sophie Nikitine\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAULLE Maelle\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Nikitine Sophie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lien M\\u00e9canique Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7b4bd5d9-32b1-40ba-9d72-d8e80ff16655_FACTURE_OSTEOPATHE_DU_06_FEVRIER_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"450003660\", \"rpps_number\": \"10010144706\", \"finess_number\": null, \"doctor_name\": \"Alexandra COQUARD\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JACOB Karine\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fb001950-6f74-4159-826f-07f1ba5929c4_Facture_2025_LEFAIT_Aurelie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"190000760\", \"rpps_number\": \"10010234762\", \"finess_number\": null, \"doctor_name\": \"Margaux BOVIS\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Aur\\u00e9lie LEFAIT\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": \"284086212608177\", \"invoice_issuer\": \"Margaux BOVIS El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie Adulte\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/847603bb-2553-4d83-ace0-2b6ff79fa244_facture_osteo_06-02-2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"490004108\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"David DA COSTA OLIVEIRA\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEMARCQ ALICE\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie David DA COSTA OLIVEIRA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b966fb26-8754-4714-b795-78ef7e1116d0_facture_ostheopate_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590005229\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Benjamin Rudent\", \"total_billed\": 63, \"bill_paid\": true, \"amount_paid\": 63, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Poujol Julie\", \"beneficiary_dob\": \"31-12-1984\", \"invoice_date\": \"21-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Benjamin Rudent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-01-2025\", \"mandatory_coverage\": null, \"amount\": 63}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a87c1caa-f6cb-4fbe-a460-8f95e76217c8_IMG_20250207_100001_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780012456\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Romuald GUERBET\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Tetiana PAVILLA\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Romuald GUERBET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/141e5445-10ab-400b-81a3-001132ec0783_ROSSIGNOL_Laurine_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780012092\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ROSSIGNOL Laurine\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUIBERT Garance\", \"beneficiary_dob\": null, \"invoice_date\": \"26-01-2024\", \"security_number\": null, \"invoice_issuer\": \"ROSSIGNOL Laurine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-01-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/05bd3d4e-4df3-4bc6-8543-39d307433f24_OSTEO_DU_6_FEV_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"050002005\", \"rpps_number\": \"10010272218\", \"finess_number\": null, \"doctor_name\": \"Tom NAVARRO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ZUCARO C\\u00e9line\", \"beneficiary_dob\": \"26-04-1983\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de NAVARRO Tom Ost\\u00e9opathe DO MROF\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie - Consultation du 06/02/2025\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/22a87bee-a05c-4605-afed-9e83d6a6e379_Scan092229_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"830003141\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Bethancourt Marjorie\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DAURAT Jean Philippe\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Bethancourt Marjorie Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f79a6aa9-72e3-4583-b2cd-ce872fc6aa2e_2500042-boust-virginie-304654533679e0a1e8a4a6_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350010039\", \"rpps_number\": \"10010145877\", \"finess_number\": null, \"doctor_name\": \"Augustin Bertaux\", \"total_billed\": 62, \"bill_paid\": true, \"amount_paid\": 62, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Virginie BOUST\", \"beneficiary_dob\": \"09-04-1977\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Augustin Bertaux E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"01-02-2025\", \"mandatory_coverage\": null, \"amount\": 62}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/77645bbd-1bc5-4636-b6c9-1c64713d19b1_facture_Raphael_Braud__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"170004915\", \"rpps_number\": \"10010137965\", \"finess_number\": null, \"doctor_name\": \"Gerard Anne\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rapha\\u00ebl Braud\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet Anne GERARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9496a72b-9c2c-4542-867f-9fc4dad457c1_IMG_20250207_084759_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"577016512\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BECKER FRANCK OLIVIER\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GAST ADELINE\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": \"291076743722996\", \"invoice_issuer\": \"Franck-Olivier Becker\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2062d98b-529c-42d0-87bb-cc131ee36c1b_Factures_Osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"490004108\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"David DA COSTA OLIVEIRA\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CAILLAUD SYLVIE\", \"beneficiary_dob\": \"28-06-1972\", \"invoice_date\": \"06-02-2025\", \"security_number\": \"272062227825484\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie David DA COSTA OLIVEIRA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0cf25711-9367-4403-a804-ebf0a0ff9242_chasselat_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0cf25711-9367-4403-a804-ebf0a0ff9242_chasselat_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590010344\", \"rpps_number\": \"10002244795\", \"finess_number\": null, \"doctor_name\": \"Dr CHASSELAT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vincent DOUAT\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Dr CHASSELAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a3745377-917b-49b9-b9fc-3e65d22c5638_BERTIN_Franck_Facture_osteopathie_du_23_01_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"9D0004054\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HOUSSEN SARAH\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BERTIN FRANCK\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"HOUSSEN SARAH\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/419fd893-b8ef-404d-bc49-d637110e06ad_facture_ostheo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"2A0000329\", \"rpps_number\": \"10010811924\", \"finess_number\": null, \"doctor_name\": \"Vincent Chiappini\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emmanuelle Riehm\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": \"2800167447019\", \"invoice_issuer\": \"Vincent Chiappini\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation \\u00e0 mon cabinet\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6dd4176a-b1b8-4388-b8f6-bbeb1236cac3_Delcros_Jeremy_facture_2025-02-05_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750013716\", \"rpps_number\": \"10010327343\", \"finess_number\": null, \"doctor_name\": \"Charles JUILLARD\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DELCROS Jeremy\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Charles JUILLARD E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultations d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e6214b1b-46c6-41cf-87b3-7dde7b58a53d_IMG_6880_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"840004345\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FERRE Caroline\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LUBRANO Ang\\u00e9lique\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"FERRE Caroline\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/afc3debe-914f-4fef-a3c0-6b156692fa37_IMG_2289_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"950001412\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emeline CAUFFET\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROUALIN Yorick\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Emeline CAUFFET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de ROUALIN Yorick\", \"quantity\": 1, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/59e5e2db-b510-4d29-af84-25d2e26b8c01_Facture_BARBARETDylan_20250116GMSI_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"600001358\", \"rpps_number\": \"10010115466\", \"finess_number\": null, \"doctor_name\": \"B\\u00e9n\\u00e9dicte BOYER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M BARBARET Dylan\", \"beneficiary_dob\": \"01-07-1995\", \"invoice_date\": \"16-01-2025\", \"security_number\": null, \"invoice_issuer\": \"B\\u00e9n\\u00e9dicte BOYER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de chiropraxie\", \"quantity\": 1, \"date_of_service\": \"16-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/55108672-490f-4845-91f7-3b58aa0d06b6_Facture_acquittee_06022025_consultation_d_osteopathie_0001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"160001855\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Elanais LE NAOUR\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEBEAULIEU Madeleine\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Elanais LE NAOUR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/be55976f-8187-46d5-a285-671217d436ac_IMG_4005_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940010028\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jacques EYSSAUTIER\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NUBEE Lina\", \"beneficiary_dob\": \"16-10-1999\", \"invoice_date\": \"04-02-2025\", \"security_number\": \"299109304505325\", \"invoice_issuer\": \"Jacques EYSSAUTIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4c53616e-7da0-4c6b-9d89-b645e80f1836_FA_25-1000_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"R\\u00e9flexologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PIMENTA Julie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GAURIAT \\u00e9pouse Yhuel Chrsitine\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"PIMENTA Julie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de r\\u00e9flexologie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/59c82220-6ed8-4ea6-937a-b40c106e7f3b_Facture_osteo_02-2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780012340\", \"rpps_number\": \"10010361482\", \"finess_number\": null, \"doctor_name\": \"Estelle LATIL\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Guerric Guillaume\", \"beneficiary_dob\": \"20-08-1974\", \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Estelle LATIL (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d79dfb79-bfef-40db-bc98-f2af328e266e_facture_ostheo_1738873157473_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340013036\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laure MARCIQUET\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SANCHEZ V\\u00e9ronique\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Laure MARCIQUET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e45ea3e1-95fe-4d22-acf4-8ed0a30d9dc6_IMG20250206200324_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurence KALTENEGGER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marine TRIOUX\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Laurence KALTENEGGER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'hypoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7f0110a2-87fa-40db-9d29-b89fbc21a5ac_Ostheo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750009839\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cohen J\\u00e9r\\u00e9mie\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Christophe Rodriguez\", \"beneficiary_dob\": \"26-03-1978\", \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Cohen J\\u00e9r\\u00e9mie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/57d909bd-743d-4b1b-a7d7-85734837078d_Facture_MorganeMousset_202503360_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"750026437\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Morgane Mousset\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MERCIER Mylene\", \"beneficiary_dob\": \"01-12-1986\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Paris 13\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fadd7aaa-0b0f-4f95-b66f-cd034f107ded_Facture_TICHIT_05022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010509577\", \"finess_number\": null, \"doctor_name\": \"Warez Pascal\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TICHIT Alexandre\", \"beneficiary_dob\": \"27-10-1976\", \"invoice_date\": \"05-02-2025\", \"security_number\": \"176109152110816\", \"invoice_issuer\": \"Centre d'ost\\u00e9opathie El Warez Pascal\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie - 05/02/2025\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/252c4ad4-8fa3-4ed1-9508-ee77f6dce6fe_img20250206_20040851_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"640008421\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Camille LABORDE\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CAMET Marie Pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Camille LABORDE D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins de chiropraxie\", \"quantity\": null, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e7466348-265c-4e0e-9b90-751c06443665_Facture_hypnose_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurence KALTENEGGER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marine TRIOUX\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Laurence KALTENEGGER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'hypnoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8c31bae4-5322-4f3b-ae00-d3dbd40446df_facture_Henry_Nadege_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"R\\u00e9flexologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Karen\", \"total_billed\": 45, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HENRY NADEGE\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Karen r\\u00e9flexo\", \"currency\": \"EUR\", \"items\": [{\"description\": \"R\\u00e9flexologie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/994f5940-fb0d-41ea-8180-2c18e7c8c7bf_2500095-roques-marie-josee-41893893467a4efea5f29b1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"310013016\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marion ROUSSEL\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROQUES Marie-jos\\u00e9e\", \"beneficiary_dob\": \"12-07-1964\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Marion ROUSSEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b2c44160-0da2-442e-9263-de0627600376_osteopathe__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"830003869\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SYLVAIN MATHURIN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cordamoni Guillaume\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"SYLVAIN MATHURIN Ost\\u00e9opathe DO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8d5e5dfe-71a8-4a05-a376-8727cd264ffd_osteopathe_do_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8d5e5dfe-71a8-4a05-a376-8727cd264ffd_osteopathe_do_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"040000321\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emilie Nicolas\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"20-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Emilie Nicolas\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"20-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/80f7efea-27d7-4526-9d60-aaed0ddbc7f2_Reflexologie_060225_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"R\\u00e9flexologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CORZ\\u00c9AME\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cecile Dautel\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CORZ\\u00c9AME R\\u00e9flexologie Plantaire\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de r\\u00e9flexologie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ecf06014-6926-473d-8fbb-8d72da4e78a1_Screenshot_20250204_121347_Drive_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"600003172\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VANHOOREN Guillaume\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROUSSEAU Antoine\", \"beneficiary_dob\": \"11-02-1997\", \"invoice_date\": \"01-02-2025\", \"security_number\": null, \"invoice_issuer\": \"VANHOOREN Guillaume\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/884bce4e-ad37-4045-9db5-2f7832b13e2d_Honoraires_2025-02-01_00134_LE_GUISCHER_Maxime_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"910001791\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9dric MILOE\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE GUISCHER Maxime\", \"beneficiary_dob\": \"15-06-1999\", \"invoice_date\": \"01-02-2025\", \"security_number\": null, \"invoice_issuer\": \"C\\u00e9dric MILOE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"01-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/661c031f-1da5-4947-bf0e-3f5ae7267b99_20250206_173541_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"1001427135\", \"finess_number\": null, \"doctor_name\": \"Aubin Girard\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SACQUIER David\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": \"1750377305034\", \"invoice_issuer\": \"SFDO Syndicat Fran\\u00e7ais Des Ost\\u00e9pathes\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5440b8b5-5ea3-4a72-a8bc-23d578333d9b_Osteo_severine_2025_20250206_0001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"627022445\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Boquet J\\u00e9r\\u00e9mie\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Lejeune Severine\", \"insured_dob\": null, \"beneficiary_name\": \"Lejeune Severine\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Boquet J\\u00e9r\\u00e9mie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9pathie / posturologie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/71541c0f-7b7c-4283-a5e9-126d51459936_PIEDNOIR_Guillaume_OSTHEO__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330013293\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien Roux\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PIEDNOIR Guillaume\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Julien Roux\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1f5c9f90-eb0c-45aa-b809-e08a7c7593b8_SOPHRO_FACTURE_MME_POIROT_MAURICETTE_JANV_25_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1f5c9f90-eb0c-45aa-b809-e08a7c7593b8_SOPHRO_FACTURE_MME_POIROT_MAURICETTE_JANV_25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"St\\u00e9phanie Mompl\\u00e9\", \"total_billed\": 100, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"POIROT Mauricette\", \"beneficiary_dob\": null, \"invoice_date\": \"22-01-2024\", \"security_number\": null, \"invoice_issuer\": \"VIVRE SA RENAISSANCE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance(s) de sophrologie individuelle(s)\", \"quantity\": 1, \"date_of_service\": \"07-01-2024\", \"mandatory_coverage\": null, \"amount\": 20}, {\"description\": \"S\\u00e9ance(s) de sophrologie individuelle(s)\", \"quantity\": 1, \"date_of_service\": \"10-01-2024\", \"mandatory_coverage\": null, \"amount\": 20}, {\"description\": \"S\\u00e9ance(s) de sophrologie individuelle(s)\", \"quantity\": 1, \"date_of_service\": \"14-01-2024\", \"mandatory_coverage\": null, \"amount\": 20}, {\"description\": \"S\\u00e9ance(s) de sophrologie individuelle(s)\", \"quantity\": 1, \"date_of_service\": \"17-01-2024\", \"mandatory_coverage\": null, \"amount\": 20}, {\"description\": \"S\\u00e9ance(s) de sophrologie individuelle(s)\", \"quantity\": 1, \"date_of_service\": \"21-01-2024\", \"mandatory_coverage\": null, \"amount\": 20}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/02c6c398-ddf5-47e9-be88-a27742a2683c_recu-fac-2025-02-002979_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"820001915\", \"rpps_number\": \"10010642345\", \"finess_number\": null, \"doctor_name\": \"Farella Marie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Curbilie Bryan\", \"beneficiary_dob\": \"25-01-2002\", \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Farella Marie Ost\\u00e9opathe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation - 05/02/2025\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9bbd433a-7b8e-4e42-a864-93df74cc61ff_IMG_20250206_171140_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"590002200\", \"rpps_number\": \"10002255072\", \"finess_number\": null, \"doctor_name\": \"Pierre COUSIN\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": 30, \"complementary_coverage\": null, \"client_part\": 35, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Johnced\", \"beneficiary_dob\": \"18-08-1973\", \"invoice_date\": \"06-02-2024\", \"security_number\": \"173085935045753\", \"invoice_issuer\": \"Docteur Pierre COUSIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation m\\u00e9dicale\", \"quantity\": 1, \"date_of_service\": \"06-02-2024\", \"mandatory_coverage\": 30, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4d8f8074-95e0-411a-b29d-25f1180fab97_Facture_Osteo_05.02.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010473766\", \"finess_number\": null, \"doctor_name\": \"Laureen FOURNIER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Priscilla Pinto\", \"beneficiary_dob\": \"30-08-1987\", \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Laureen FOURNIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2d7ef988-f270-496b-a735-17a5f2adf181_Fact_OSTEO_6-2-25_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"720001510\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BEAUREPERE Pierre-Marie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SORT Lo\\u00efc\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BEAUREPERE Pierre-Marie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/946f9f6b-e52f-4829-8e09-2ce9ae1da317_2500086-jonghmans-clara-166238007867a48ead63272_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"730003720\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie Cadot\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Clara JONGHMANS\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Marie Cadot\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires\", \"quantity\": null, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a6321cee-55d4-42c4-be96-6b3a59d70908_facture_osteo_030225_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"620000158\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MORGAND Estelle\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DZUIBA Sylvie\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"MORGAND Estelle\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ffa6f7d2-f14a-4a66-bc63-69977fb71a7a_2_consult_hypnose__1_scale_0.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ffa6f7d2-f14a-4a66-bc63-69977fb71a7a_2_consult_hypnose__2_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. RIVIER Hugo\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. BURGEAUD Sylvain\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"M. RIVIER Hugo\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en Hypnose - Bien-etre\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}, {\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. RIVIER Hugo\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. BURGEAUD Sylvain\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"M. RIVIER Hugo\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en Hypnose - Bien-etre\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/749991f0-5f22-4b49-ad9d-457db1b5c472_facture_osteopathe_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"281022186\", \"rpps_number\": \"10003781985\", \"finess_number\": null, \"doctor_name\": \"WOLFRUM L\\u00e9onardo\", \"total_billed\": 85, \"bill_paid\": true, \"amount_paid\": 85, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BAUDINAT YANN\", \"beneficiary_dob\": null, \"invoice_date\": \"07-02-2025\", \"security_number\": null, \"invoice_issuer\": \"DOCTEUR WOLFRUM L\\u00e9onardo\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 85}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/af206337-404d-4b97-9772-baae1962c3ee_IMG_2129_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"577011448\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fr\\u00e9d\\u00e9ric BOUR\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HERIZOG ZINS OSCAR\", \"beneficiary_dob\": null, \"invoice_date\": \"20-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Fr\\u00e9d\\u00e9ric BOUR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"acte de MASSAGE THERAPEUTIQUE - Th\\u00e9rapie manuelle Ost\\u00e9opatique - Microkin\\u00e9sith\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"20-12-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ffc03321-4cf7-4e3e-9ab0-f24728818475_IMG_2128_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"577011448\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fr\\u00e9d\\u00e9ric BOUR\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HERIZOG Zins Eliott\", \"beneficiary_dob\": null, \"invoice_date\": \"20-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Fr\\u00e9d\\u00e9ric BOUR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"acte de MASSAGE THERAPEUTIQUE - Th\\u00e9rapie manuelle Ost\\u00e9opatique - Microkin\\u00e9sith\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"20-12-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6d20c1cc-fa3b-41a9-990e-d115f2c1265b_Facture_MUNOZ_Florian_2025-0002-3_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6d20c1cc-fa3b-41a9-990e-d115f2c1265b_Facture_MUNOZ_Florian_2025-0002-3_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CLEMENT-HU Wenjing\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MUNOZ Florian\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CLEMENT HU Wenjing\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de la Sophrologie - TUI NA MASSAGE TRADITIONNEL CHINOIS\", \"quantity\": 1, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}, {\"is_bill\": true, \"profession\": \"Sophrologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CLEMENT-HU Wenjing\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MUNOZ Florian\", \"beneficiary_dob\": null, \"invoice_date\": \"15-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CLEMENT HU Wenjing\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de la Sophrologie - TUI NA MASSAGE TRADITIONNEL CHINOIS\", \"quantity\": 1, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0efee497-131b-4961-a1dc-2810eedbb393_Facture_BALDACHINO.V_chiropracteur__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"750036287\", \"rpps_number\": \"10010376027\", \"finess_number\": null, \"doctor_name\": \"DECREMPS MARIA\", \"total_billed\": 100, \"bill_paid\": true, \"amount_paid\": 100, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BALDACHINO Vanessa\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET CHIROPRAXIE DECREMPS MARIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"traitement Chiropractique\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 100}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/56f4bf93-8fe8-4273-a48a-4d2551892ec8_facture_osthero_2_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"927028928\", \"rpps_number\": \"10005777585\", \"finess_number\": null, \"doctor_name\": \"Cl\\u00e9mence Garcia\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Benjamin Bouchet\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cl\\u00e9mence Garcia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/323d5c7f-9ec7-42b0-9cad-d7cbd5f5c4d1_2025-02-06-simon-blondel_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"760004291\", \"rpps_number\": \"10010175924\", \"finess_number\": null, \"doctor_name\": \"Alice DEGUELLE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BLONDEL Simon\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE Alice DEGUELLE El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a401f2d4-0ddd-4abb-ac2a-7d89f8926418_17388507245886317966749607885636_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010535861\", \"finess_number\": null, \"doctor_name\": \"M. ROPERS Corentin\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FREJAVILLE Jean-Charles\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"M. ROPERS Corentin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e3c36ee1-bf05-4ac7-b27f-c7cef109b796_Note_dhonoraires_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940008576\", \"rpps_number\": \"10010347333\", \"finess_number\": null, \"doctor_name\": \"Kim TORRE\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Virginie Le Masson\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Kim TORRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/36c74c5c-7d74-4618-ae9b-405fb5b7609c_IMG_2091_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690018080\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"J\\u00e9r\\u00f4me LAURENT\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHAVANNE Emmanuel\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/afc2330e-295a-4fcf-b12f-eb7de776c7a7_EMERY_Jerome_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"600003321\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. FLORENT AUSSENAC\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EMERY Jerome\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"M. FLORENT AUSSENAC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/889ac4cf-a3cf-4c3d-9629-4511040883e8_F2500033_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Acupuncture\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SEBASTIEN POUSTHOMIS\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ana maria Damiao\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"M\\u00e9decine traditionnelle chinoise SEBASTIEN POUSTHOMIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acupuncture\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b1b5e4da-2462-48c1-a9b9-ff75ecd61e37_IMG_20250206_142210_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"240000513\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathieu MORINEAU\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Romuald TEXIER\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": \"175012432209055\", \"invoice_issuer\": \"M. Mathieu MORINEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie Adulte\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e37b1d1c-bf16-4bf3-9643-c0165eab5bce_17388480916126714440449278530087_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"317021699\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LIONEL GIUSTI\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CERVERO-SUSINI Catherine\", \"beneficiary_dob\": \"17-11-1967\", \"invoice_date\": \"05-02-2025\", \"security_number\": \"267118408705013\", \"invoice_issuer\": \"LIONEL GIUSTI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ed44a532-8aa9-4d47-9bbf-d10cd1dc4647_osteofev25seb_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"420005803\", \"rpps_number\": \"10010714383\", \"finess_number\": null, \"doctor_name\": \"Lisa TURCO\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"S\\u00e9bastien COUTURIER\", \"beneficiary_dob\": \"23-07-1984\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"E.i lisa TURCO (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6d5224ac-69a5-4658-b278-a7a03fef5cd8_facture_osteo_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"340005974\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Matthieu JUMEAUX ALONZO\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BIRAC Joann\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Matthieu JUMEAUX ALONZO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ec4849e0-81f0-4bb6-8faf-e11f09e1b73e_PICHAVANT_Eric_06022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"560006918\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE NORMAND Elisabeth\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PICHAVANT Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"LE NORMAND Elisabeth\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3e613099-4659-4efd-b33f-811fc5fc764d_IMG_20250206_131824_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Etiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Quentin GARCIA\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GODEFROY Julien\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Quentin GARCIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation T.T.C.\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bcdab145-079b-4ba1-a334-97abfd45d9f8_Cabinet_Perreux_sur_marne_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940010507\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ELFASSI Ha\\u00efm\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KAZENAS Johanne\", \"beneficiary_dob\": \"27-10-1994\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"ELFASSI Ha\\u00efm\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/99d371ca-aa44-42da-9a68-37571e5f9aee_facture_osteopathe__01-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"330003476\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dr Christophe ELLEAU\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nelly ROCHET\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Dr Christophe ELLEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6b4a0423-ddee-4096-9e41-13e7825c9618_facture_mutuelle_osteopathie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"130022056\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Am\\u00e9lie ANATON\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BENCHAMA Hakima\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Am\\u00e9lie ANATON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/920c8c7c-e3b7-445f-8fcc-4d89cc33a578_facture_osteo_du_04022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"390001501\", \"rpps_number\": \"10010433323\", \"finess_number\": null, \"doctor_name\": \"Lola PRAGER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"C\\u00e9dric Forest\", \"beneficiary_dob\": \"13-06-1982\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Lola PRAGER (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2a2e6719-2efc-4e92-9d56-138d6a5893be_facture_14_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010264488\", \"finess_number\": null, \"doctor_name\": \"Don Maite El\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sonia SERRANO\", \"beneficiary_dob\": \"11-02-1975\", \"invoice_date\": \"29-01-2025\", \"security_number\": \"275024406902753\", \"invoice_issuer\": \"LE LU Bleuenn Ost\\u00e9opathe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/36239e6f-f9cc-408c-b510-7638be6e0622_FACTURE_OSTHEO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"840003867\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ludovic FOUILLEUL\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Stephon Ronieux\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Ludovic FOUILLEUL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f02ef934-5964-4230-bfab-42bb97f20398_Scan_20250206_112050_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"940000110\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 90, \"bill_paid\": true, \"amount_paid\": 90, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NAPAL Mila\", \"beneficiary_dob\": \"05-08-1975\", \"invoice_date\": \"05-02-2025\", \"security_number\": \"20240017976668\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d60cea6d-1075-4e5d-9b52-92b5751c824f_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"380010942\", \"finess_number\": null, \"doctor_name\": \"Mathis TOURET\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Martin Pauline\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Mathis TOURET E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/40003ed8-4072-4398-bddd-0090359e602d_osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"670000298\", \"rpps_number\": \"10010494507\", \"finess_number\": null, \"doctor_name\": \"GENAY Sylvie\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HUET Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Sylvie GENAY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Un acte ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1a3dc013-5769-4e44-aa30-de4be4852e08_Seance_ostheo_TANCHOU_David_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"220004360\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GUITTON Nicolas\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TANCHOU David\", \"beneficiary_dob\": \"24-08-1968\", \"invoice_date\": \"09-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie du Domaine Hillion\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie - Consultation du 09/01/2025\", \"quantity\": 1, \"date_of_service\": \"09-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/dbecbc8b-6d12-4993-91fe-ee0e13de8911_seance_ostheo_Rose_Marie_THEUILLERE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220004360\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GUITTON Nicolas\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"THEUILLERE Rose Marie\", \"beneficiary_dob\": \"26-05-1968\", \"invoice_date\": \"09-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie du Domaine Hillion\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie - Consultation du 09/01/2025\", \"quantity\": 1, \"date_of_service\": \"09-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d172cce4-7c6e-4954-93ff-ba8bba04e7db_2025-02-04_Facture_BAUDAIS_Ingrid_4_fevrier_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350007274\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BLOYET Servane\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BAUDAIS Ingrid\", \"beneficiary_dob\": \"27-03-1978\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BLOYET Servane\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5e5d3bd8-4494-4764-a001-fba806688587_Facture_ostheo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"820002095\", \"rpps_number\": \"10010646387\", \"finess_number\": null, \"doctor_name\": \"Viollet GABRIEL\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Hugo SOLIGNAC\", \"beneficiary_dob\": \"09-04-2000\", \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Viollet GABRIEL (El)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6229a44b-c626-4b50-a4e3-08560e4351c1_00206B452474250206101554-1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770008712\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DEVOS Tracy\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jleriau Erwan\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"DEVOS Tracy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/815d334e-5da4-47f8-8ab5-2dd9365f9b63_Numerisation_20250206_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"820000107\", \"rpps_number\": \"10010604733\", \"finess_number\": null, \"doctor_name\": \"Bastien POLESELLO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Loubradou Jean Pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": \"159089935440675\", \"invoice_issuer\": \"Bastien POLESELLO - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": true, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9aea8ae5-7be5-4053-ac18-4135ec373137_IMG_5022_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690002001\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GUILLAUME POTHIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BIGEARD CLEMENT\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"GUILLAUME POTHIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/662df7c6-dd5f-4209-86b5-c07061701aab_Scan_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"470000936\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Guillaume FOURNIER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"THENADEY Roger\", \"beneficiary_dob\": \"26-09-1939\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Guillaume FOURNIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/81fa09d5-04c7-4ed5-8a19-e6e724a47acb_MORIN_MARYLINE_osteopathe_050225_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"770005098\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9l\\u00e8ne LECONTE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MORIN Maryline\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie H\\u00e9l\\u00e8ne LECONTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s)\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3fd4b2e1-0247-47a1-8f27-ef7d95b9bcd2_Facture_M._Delattre_3_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"350008439\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Constance\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DELATTRE Magda\", \"beneficiary_dob\": \"16-02-1971\", \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE BAGUE Constance\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9c09398e-1348-4844-8fce-e0421ad306c9_note-d-honoraire_-_2025-02-05T144527.016_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"780010732\", \"rpps_number\": \"10010352119\", \"finess_number\": null, \"doctor_name\": \"In\\u00e8s Robin-Pr\\u00e9vall\\u00e9e\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TALMON St\\u00e9ven\", \"beneficiary_dob\": \"11-01-1971\", \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"In\\u00e8s Robin-Pr\\u00e9vall\\u00e9e\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8fffd4ea-784b-49df-9f7e-780eed5664c8_facturation_osteo-1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"690007729\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHOUDAR MOHAMED\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Decroze morgane\", \"beneficiary_dob\": null, \"invoice_date\": \"06-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CHOUDAR MOHAMED\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/84731b17-5743-4240-8aab-0f1bcc603925_chiropracteur_1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"560002743\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Launay Adrien D.C\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE BLANC Didier\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Launay Adrien D.C\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f6db258d-f400-41b5-a5ca-7878010f0fd7_doc00945920250206091811_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"060006749\", \"rpps_number\": \"10010783149\", \"finess_number\": null, \"doctor_name\": \"Alexandra KORTULEWSKI\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BARATTE Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Alexandra KORTULEWSKI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins ost\\u00e9opathiques\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e2bfe226-b525-4ebc-b88f-6ab167099bc6_FACTURE_OSTEOPATHE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"840005888\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PHILIPPI CL\\u00c9MENT\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Casale Didier\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"PHILIPPI CL\\u00c9MENT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6bf53897-d7c7-4485-9393-4ebbaf952d30_2ostheoRecu_2025-02-06_092631_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"630000628\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DUMOULIN Laureline\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHAUVET Delphine\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": \"285026311322257\", \"invoice_issuer\": \"DUMOULIN Laureline\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c69dded1-0e73-43af-a778-fc7cf8ea0598_SEANCE_OSTEOPATHIE_05_02_25_MONTIEGE_NATHALIE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"530000066\", \"rpps_number\": \"10101229226\", \"finess_number\": null, \"doctor_name\": \"Laurent GAISNON\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nathalie MONTIEGE\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Laurent GAISNON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cf942200-21da-4b0d-b26b-f40d64c3eda6_rager_vero_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"850003161\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Charles RAGER\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAVON V\\u00e9ronique\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Jean-Charles RAGER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fc21bd20-da08-4e6a-9b42-7e11b57d9d22_Facture_osteopathe_Madame_Gregoire_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vincent Perret\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRECOINT St\\u00e9phane\", \"beneficiary_dob\": \"24-12-1978\", \"invoice_date\": \"29-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Vincent Perret\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/571b0712-b0bb-4211-8da4-d7b7e7ba8cc0_17388279193805260395571884052289_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"470001835\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emeline REY\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emilie MORIN\", \"beneficiary_dob\": null, \"invoice_date\": \"20-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Emeline REY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"20-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d381acdc-4936-42a4-bb36-a140c8e12d97_pirat_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Etiopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Elodie PIRAT\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRONDIN Jean Francois\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Elodie PIRAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de Jean Francois GRONDIN du 29/01/2025\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3b2e0bc5-3f96-456f-9a21-1b6a5c2138b8_Osteopathe_Sophie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": null, \"rpps_number\": \"10010711744\", \"finess_number\": null, \"doctor_name\": \"Baptiste REY\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TORRE Sophie\", \"beneficiary_dob\": \"16-09-1972\", \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Baptiste REY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/44f39110-8f97-415d-8cc1-989f0a6df226_Ostheo_22_01_25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"510000219\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"JEAN JAECK\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BEAUCHARPS Thomas\", \"beneficiary_dob\": null, \"invoice_date\": \"22-01-2025\", \"security_number\": null, \"invoice_issuer\": \"JEAN JAECK\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"22-01-2025\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/960a23fa-c110-4dfb-a0df-aa3a8f40314e_Facture_Osteo_Patricia_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"830015095\", \"rpps_number\": \"10010810504\", \"finess_number\": null, \"doctor_name\": \"Juliette RODRIGUEZ\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUGRAVOT Patricia\", \"beneficiary_dob\": null, \"invoice_date\": \"03-02-2025\", \"security_number\": \"2700952121088\", \"invoice_issuer\": \"Juliette RODRIGUEZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b80394eb-a2ed-45a5-a77e-d5d76471f31c_IMG_6684_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"78000753\", \"rpps_number\": \"10010340908\", \"finess_number\": null, \"doctor_name\": \"Jeremie NGUYEN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAGRUE Marjorie\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE Jeremie NGUYEN EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1150ac6a-abdc-409d-b7ca-296e7b35769c_Facture_BOLZERJean_Francois_20250131GMAM_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"rpps_number\": \"10010487618\", \"finess_number\": null, \"doctor_name\": \"Alexis CORTINA\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M BOLZER Jean Fran\\u00e7ois\", \"beneficiary_dob\": \"30-08-1972\", \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de chiropraxie Cortina\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi Chiropratique\", \"quantity\": null, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27533786_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CRESPO PHILIPPE\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"super-VISION.fr\", \"currency\": null, \"items\": [{\"description\": \"Biofinity Toric MF +6 -1.75 170 +2.00D x6pk\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Biofinity Toric MF +10 -3.75 175 +2.00N x6pk\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27534156_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"750067217\", \"rpps_number\": null, \"finess_number\": \"142630292\", \"doctor_name\": \"\\u00c9ts CENTRE\", \"total_billed\": 62.2, \"bill_paid\": true, \"amount_paid\": 62.2, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"DUPUIS ANGELINE\", \"insured_dob\": \"14-10-1992\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"06-06-2023\", \"security_number\": \"292109923701414\", \"invoice_issuer\": \"OPTIC 2000\", \"currency\": \"EUR\", \"items\": [{\"description\": \"COOPERVISION - Biofinity (6) - 14/8.6 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 31.1}, {\"description\": \"COOPERVISION - Biofinity (6) - 14/8.6 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 31.1}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27535306_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": 920030616, \"rpps_number\": null, \"prescripteur_finess_number\": \"920030616\", \"finess_number\": \"952668648\", \"doctor_name\": null, \"total_billed\": 45.0, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 45.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SYLVESTRE Estelle\", \"beneficiary_dob\": \"02-07-1993\", \"invoice_date\": \"15-06-2023\", \"security_number\": \"293077511482549\", \"invoice_issuer\": \"L'OPTICIEN CONCEPT BEZONS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BAULER BIOTRUE ONEday 30L diam: 14.2, ray: 8.6, souple Journalier Rem 1 jour (par 30)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 45.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27535832_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132611062\", \"doctor_name\": null, \"total_billed\": 34.8, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 34.8, \"remaining_payment\": 34.8, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BECCA Angelina\", \"beneficiary_dob\": null, \"invoice_date\": \"19-06-2023\", \"security_number\": \"2 01 03 13 155 791\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique VITROLLES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles - AIR OPTIX plus HydraGlyde 3L Isozone Dur\\u00e9e de vie Mensuelle (PU:14.40\\u20ac)Souplesse Vision Confortable De 3 Lentilles\", \"quantity\": 1, \"date_of_service\": \"\", \"mandatory_coverage\": \"\", \"amount\": 14.4}, {\"description\": \"Lentilles - AIR OPTIX plus HydraGlyde 3L Isozone Dur\\u00e9e de vie Mensuelle (PU:14.40\\u20ac)Souplesse Vision Confortable De 3 Lentilles\", \"quantity\": 1, \"date_of_service\": \"\", \"mandatory_coverage\": \"\", \"amount\": 14.4}, {\"description\": \"Frais administratifs\", \"quantity\": 1, \"date_of_service\": \"\", \"mandatory_coverage\": \"\", \"amount\": 6.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27535886_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131753915\", \"rpps_number\": null, \"finess_number\": \"132658394\", \"doctor_name\": \"COLLET\", \"total_billed\": 90.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 90.0, \"remaining_payment\": 0, \"insured_name\": \"M. Nadine LONGO\", \"insured_dob\": \"01-01-1960\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"03-06-2023\", \"security_number\": \"267011305561234\", \"invoice_issuer\": \"Les Mirettes Of Marseille\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACCES530 LENTILLES OPHTALMIC L(OPH) +0,50 H* Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.0}, {\"description\": \"ACCES530 LENTILLES OPHTALMIC L(OPH) +2,50 H* Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27537843_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lolita DUMONTET\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"2850273066047\", \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27537844_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27537844_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 81.7, \"bill_paid\": true, \"amount_paid\": 81.7, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lolita DUMONTET\", \"beneficiary_dob\": null, \"invoice_date\": \"16-05-2023\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Toric (6) (toriques) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 33.9}, {\"description\": \"Biofinity Toric (6) (toriques) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 33.9}, {\"description\": \"everase plus 360ml\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 17.7}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27538221_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"771043312\", \"rpps_number\": \"10000703362\", \"finess_number\": \"772603775\", \"doctor_name\": \"MARTIN BEUZART Sylvie\", \"total_billed\": 184.0, \"bill_paid\": true, \"amount_paid\": 184.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"M. VERDEGEM FLORIAN\", \"insured_dob\": \"28-06-1995\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"16-06-2023\", \"security_number\": \"195065935530413\", \"invoice_issuer\": \"C.O OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson & Johnson - ACUVUE\\u00ae OASYS\\u00ae with HYDRACLEAR\\u00ae Plus vente par 12 - 14/8.8 - BOITE DE 12\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 92.0}, {\"description\": \"Johnson & Johnson - ACUVUE\\u00ae OASYS\\u00ae with HYDRACLEAR\\u00ae Plus vente par 12 - 14/8.8 - BOITE DE 12\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 92.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27544118_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"prescripteur_finess_number\": \"911002962\", \"finess_number\": \"752608000\", \"doctor_name\": \"BALERINI\", \"total_billed\": 180.0, \"bill_paid\": true, \"amount_paid\": 180.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 180.0, \"remaining_payment\": 0, \"insured_name\": \"JUDITH YANNICK\", \"insured_dob\": \"15-10-1976\", \"beneficiary_name\": \"JUDITH YANNICK\", \"beneficiary_dob\": \"15-10-1976\", \"invoice_date\": \"08-09-2022\", \"security_number\": \"176109740709784\", \"invoice_issuer\": \"SMS OPTIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BAUSCH LOMB BIOTRUE MULTIFOCAL/90 JETAB LES HORS TIPS\", \"quantity\": 1, \"date_of_service\": \"08-09-2022\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"BAUSCH LOMB BIOTRUE MULTIFOCAL/90 JETAB LES HORS TIPS\", \"quantity\": 1, \"date_of_service\": \"08-09-2022\", \"mandatory_coverage\": null, \"amount\": 90.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27545119_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27545119_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"311081806\", \"rpps_number\": null, \"finess_number\": \"312623275\", \"doctor_name\": \"CALIX Isabelle\", \"total_billed\": 280.0, \"bill_paid\": true, \"amount_paid\": 120.0, \"mandatory_coverage\": null, \"complementary_coverage\": 160.0, \"client_part\": 120.0, \"remaining_payment\": 0.0, \"insured_name\": \"DEWEZ BEATRICE\", \"insured_dob\": \"24-03-1973\", \"beneficiary_name\": \"DEWEZ BEATRICE\", \"beneficiary_dob\": \"24-03-1973\", \"invoice_date\": \"30-05-2023\", \"security_number\": \"273031120601861\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CooperVision - Easylens H2O+ Multifocal D (6) - 14/8.6 - BOITE DE 6\", \"quantity\": 4, \"date_of_service\": \"30-05-2023\", \"mandatory_coverage\": null, \"amount\": 248.0}, {\"description\": \"CooperVision - Easylens 1-Day Air+ Multifocal Low (30) - 14.1/8.6 - BOITE DE 30\", \"quantity\": 1, \"date_of_service\": \"30-05-2023\", \"mandatory_coverage\": null, \"amount\": 32.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27545715_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752609073\", \"doctor_name\": \"BESNAINOU\", \"prescripteur_finess_number\": \"751389693\", \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"EDOUARD ERIC\", \"insured_dob\": \"28-03-1968\", \"beneficiary_name\": \"EDOUARD ERIC\", \"beneficiary_dob\": \"28-03-1968\", \"invoice_date\": \"14-06-2023\", \"security_number\": \"168039721315515\", \"invoice_issuer\": \"LES OPTICIENS DU JURA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles\", \"quantity\": 1, \"amount\": 160.0}, {\"description\": \"LENTILLES JETABLES HORS TIPS\"}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27547656_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"591757075\", \"rpps_number\": null, \"finess_number\": \"592651897\", \"doctor_name\": \"SAUTIERE Bruno jean\", \"total_billed\": 24.9, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Benjamin GOULOIS\", \"insured_dob\": \"22-02-1987\", \"beneficiary_name\": \"Benjamin GOULOIS\", \"beneficiary_dob\": \"22-02-1987\", \"invoice_date\": \"20-06-2023\", \"security_number\": \"187026211907897\", \"invoice_issuer\": \"KRYS LILLE SOLFERINO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson&Johnson Vision Care - 1 r. C. Desmoulins-FR-92787 Issy-les-Moulineaux 6 x ACUVUE OASYS Lentille Sph\\u00e9ric Jetable Port Prolong\\u00e9 Conditionnement : 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 24.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27551187_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"adeli_number\": \"131880171\", \"rpps_number\": \"10100375905\", \"finess_number\": \"132651761\", \"doctor_name\": \"Dr Gheorghe\", \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SEBASTIEN DOLIGNON\", \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2023\", \"security_number\": null, \"invoice_issuer\": \"POLE VISION DE LA CRAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact a adapter + produits d'entretien\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27552412_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GALLOIS Eug\\u00e9nie\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"282049527701393\", \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27552413_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"061128153\", \"rpps_number\": null, \"prescripteur_finess_number\": \"061128153\", \"finess_number\": \"062604046\", \"doctor_name\": \"KHLIF MEZIOU Hela\", \"total_billed\": 48.8, \"bill_paid\": true, \"amount_paid\": 48.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 48.8, \"remaining_payment\": 0.0, \"insured_name\": \"Eugenie GALLOIS\", \"insured_dob\": \"01-04-1982\", \"beneficiary_name\": \"Eugenie GALLOIS\", \"beneficiary_dob\": \"01-04-1982\", \"invoice_date\": \"13-05-2023\", \"security_number\": \"282049527701393\", \"invoice_issuer\": \"KRYS LE CANNET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Alcon France-20 rue des deux gares-FR-92563 RUEIL MALMAISON GALLOIS EUGENIE A119 14,00 R 8,70 Lentille Souple Jetable Port Journalier\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 48.8}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27552835_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"942611518\", \"doctor_name\": null, \"total_billed\": 49.0, \"bill_paid\": true, \"amount_paid\": 49.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 49.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BERTOTTI Christian\", \"beneficiary_dob\": null, \"security_number\": \"1 50 07 75 068 003\", \"invoice_date\": \"16-06-2023\", \"invoice_issuer\": \"Generale d'Optique IVRY SUR SEINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - ACUVUE\\u00ae OASYS\\u00ae with HYDRACLEAR\\u00ae Plus vente par 6 Dur\\u00e9e de vie:Bi-mensuelle (PU:24,50\\u20ac)Stople:Johnson & Johnson:Boite De 6 Lentilles\", \"quantity\": 2, \"date_of_service\": \"16-06-2023\", \"mandatory_coverage\": null, \"amount\": 49.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27554761_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"131133076\", \"rpps_number\": \"10003352530\", \"finess_number\": null, \"doctor_name\": \"Docteur Yannick BARRAUD\", \"insured_name\": \"LOLO Antoinette\", \"invoice_issuer\": \"Docteur Yannick BARRAUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact multifocale\", \"date_of_service\": \"28-07-2022\"}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27554784_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"751635566\", \"rpps_number\": null, \"prescripteur_finess_number\": null, \"finess_number\": \"932656440\", \"doctor_name\": \"BURTIN\", \"total_billed\": 21.0, \"bill_paid\": true, \"amount_paid\": 21.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 21.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Yvonne LEBRIN\", \"beneficiary_dob\": \"22-05-1970\", \"invoice_date\": \"21-06-2023\", \"security_number\": \"27005940800229\", \"invoice_issuer\": \"OPTIQUE LEBOVIC SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Alcon France - 20 Rue des Deux Gares - FR 92563 RUEIL MALMAISON 2 x FRESH LOOK COLORBLEND VERT AMAN Lentille Souple Fr\\u00e9quent Port Prolong\\u00e9\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 21.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27555315_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"751635566\", \"rpps_number\": null, \"prescripteur_finess_number\": \"751635566\", \"finess_number\": \"932656440\", \"doctor_name\": \"BURTIN\", \"total_billed\": 21.0, \"bill_paid\": true, \"amount_paid\": 21.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Yvonne LEBRIN\", \"beneficiary_dob\": \"22-05-1970\", \"invoice_date\": \"25-05-2023\", \"security_number\": \"27005940802229\", \"invoice_issuer\": \"OPTIQUE LEBOVIC SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Alcon France -20 hydris deux paires- FR-225563 RUEIL MALMAISON 2 x FRESHLOOK COLORBLEND VERT AMAN Lentilles Souples Fr\\u00e9quence Port Prolong\\u00e9e\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 21.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27557952_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132663873\", \"prescripteur_finess_number\": \"130051642\", \"doctor_name\": \"LAMDAOUI\", \"total_billed\": 90.0, \"bill_paid\": true, \"amount_paid\": 90.0, \"complementary_coverage\": null, \"client_part\": 90.0, \"insured_name\": \"Soummani Ghassan\", \"insured_dob\": \"02-10-2006\", \"beneficiary_name\": \"Soummani Ghassan\", \"beneficiary_dob\": \"02-10-2006\", \"invoice_date\": \"17-06-2023\", \"security_number\": \"2831299350855\", \"invoice_issuer\": \"FRED OPTIQUE AIX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"J&J ONE DAY ACUVUE MOIST BTE DE 90\", \"quantity\": 1, \"amount\": 45.0}, {\"description\": \"J&J ONE DAY ACUVUE MOIST BTE DE 90\", \"quantity\": 1, \"amount\": 45.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27557954_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"130051642\", \"rpps_number\": null, \"prescripteur_finess_number\": \"130051642\", \"finess_number\": \"132663873\", \"doctor_name\": \"LAMDAOUI\", \"total_billed\": 45.0, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 45.0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VERGARA Madiha\", \"beneficiary_dob\": \"08-12-1983\", \"invoice_date\": \"17-06-2023\", \"security_number\": \"2831299350855\", \"invoice_issuer\": \"FRED OPTIQUE AIX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"J&J ONE DAY ACUVUE MOIST BTE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 22.5}, {\"description\": \"J&J ONE DAY ACUVUE MOIST BTE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 22.5}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27559288_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27559288_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10001554749\", \"finess_number\": \"932656440\", \"prescripteur_finess_number\": \"750000549\", \"doctor_name\": \"Dr. DENIA CHERRAZ\", \"total_billed\": 607.6, \"bill_paid\": true, \"amount_paid\": 560.22, \"mandatory_coverage\": 47.38, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dorina GAZE\", \"beneficiary_dob\": \"08-02-1999\", \"invoice_date\": \"20-06-2023\", \"security_number\": \"293029741505041\", \"invoice_issuer\": \"OPTIQUE LEBOVIC SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Menicon Europe-1.5 r de la P\\u00e9relle - FR-93420 Villepinte 1 x Rose K2 Mat\\u00e9riau EX lentille rigide perm\\u00e9able - renouvellement annuel - port diurne - mat\\u00e9riau EX - flacon de 1 lentille Quantit\\u00e9 : 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 303.8}, {\"description\": \"Menicon Europe-1.1 r de la P\\u00e9relle - FR-93420 Villepinte 1 x Rose K2 Mat\\u00e9riau EX lentille rigide perm\\u00e9able - renouvellement annuel - port diurne - mat\\u00e9riau EX - flacon de 1 lentille Quantit\\u00e9 : 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 303.8}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27559421_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752613927\", \"doctor_name\": null, \"total_billed\": 139.2, \"bill_paid\": true, \"amount_paid\": 139.2, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIROD Olivier\", \"beneficiary_dob\": null, \"invoice_date\": \"20-06-2023\", \"security_number\": \"1750775109067\", \"invoice_issuer\": \"GrandOptical HAUSSMANN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - ACUVUE\\u00ae OASYS\\u00ae 1-Day with HydraLuxe\\u2122 Vente par 90 Dur\\u00e9e de vie Journali\\u00e8re (PU:69.60\\u20ac)Souple/Johnson & Johnson/Boite De 90 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.6}, {\"description\": \"Lentille G - ACUVUE\\u00ae OASYS\\u00ae 1-Day with HydraLuxe\\u2122 Vente par 90 Dur\\u00e9e de vie Journali\\u00e8re (PU:69.60\\u20ac)Souple/Johnson & Johnson/Boite De 90 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.6}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27559561_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10004821939\", \"finess_number\": \"592672596\", \"doctor_name\": \"CHEVALIER PHILIPPE\", \"total_billed\": 179.8, \"bill_paid\": true, \"amount_paid\": 179.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Yanick TOLIKOLI\", \"beneficiary_dob\": \"10-04-1991\", \"invoice_date\": \"03-06-2023\", \"security_number\": \"191048306103384\", \"invoice_issuer\": \"Le Collectif des Lunetiers Roubaix - Motte\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson&Johnson Vision Care- 1r, C. Desmoulins- FR-92787 Issy-les-Moulineaux 90+1 DAY ACQASYS 90 (H3) Lentille Souple Jetable Port Journalier; Conditionnement: 90 Quantit\\u00e9: 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 79.9}, {\"description\": \"Johnson&Johnson Vision Care- 1r, C. Desmoulins- FR-92787 Issy-les-Moulineaux 90+1 DAY ACQASYS (H3) Lentille Souple Jetable Port Journalier; Conditionnement: 90 Quantit\\u00e9: 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 99.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27559701_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27559701_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 126.65, \"bill_paid\": true, \"amount_paid\": 126.65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vianney et Lucie BERT\", \"beneficiary_dob\": null, \"invoice_date\": \"14-06-2023\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE Oasys (12) (Lentilles sph\\u00e9riques) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.08}, {\"description\": \"ACUVUE Oasys (12) (Lentilles sph\\u00e9riques) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.08}, {\"description\": \"ACUVUE Oasys (12) (Lentilles sph\\u00e9riques) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.08}, {\"description\": \"ACUVUE Oasys (12) (Lentilles sph\\u00e9riques) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.08}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 17.0}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -6.67}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27560309_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131195984\", \"rpps_number\": null, \"prescripteur_finess_number\": \"131195984\", \"finess_number\": \"132665456\", \"doctor_name\": \"OUADAH Saad\", \"total_billed\": 180.0, \"bill_paid\": true, \"amount_paid\": 180.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean-Marc BENZI\", \"beneficiary_dob\": \"18-08-1969\", \"invoice_date\": \"04-05-2023\", \"security_number\": \"169081305512404\", \"invoice_issuer\": \"SASU AM OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Coopervision France-1800, riades Crtres-FR-06560 Sophia Antipolis 90 x PROCLEAR 1 DAY 90 Lentille Souple Jetable Port Journalier-Conditionnement : 90 Quantit\\u00e9 : 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Coopervision France-1800, riades Crtres-FR-06560 Sophia Antipolis 90 x PROCLEAR 1 DAY 90 Lentille Souple Jetable Port Journalier-Conditionnement : 90 Quantit\\u00e9 : 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27561402_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131170425\", \"rpps_number\": null, \"prescripteur_finess_number\": \"131170425\", \"finess_number\": \"132656604\", \"doctor_name\": \"REYMONDET Jean-marc\", \"total_billed\": 108.0, \"bill_paid\": true, \"amount_paid\": 108.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chantal MILLION-ROUSSEAU\", \"beneficiary_dob\": \"28-12-1957\", \"invoice_date\": \"20-06-2023\", \"security_number\": \"257120103433093\", \"invoice_issuer\": \"Le Collectif des Lunetiers Aix en Provence\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Procilens Laboratoires -25, rue Auguste Perret- FR-94044 CRETEIL C2XTRA HIGH BOITE Lentille Souple avec une perm\\u00e9abilit\\u00e9 oxyg\\u00e8ne de 18 et une transmissibilit\\u00e9 oxyg\\u00e8ne de 86 \\u00e0 renouveler de mani\\u00e8re trimestrielle (3 mois). Traitement proclear (d\\u00e9silicone (SILICONE), \\u00e9tanch\\u00e9it\\u00e9 par boite) Quantit\\u00e9: 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 108.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/00ac2a76-52ec-437a-8477-a573d62ecbe4_Livret_de_Famille_2024_1-3_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Gaz-recto-633x1024_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/actaadfdfffe_mariage_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Magasinier-Exemple-de-CV_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/modele quittance de loyers agricoles_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27562736_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"351727805\", \"rpps_number\": \"10100951119\", \"finess_number\": \"352612444\", \"doctor_name\": \"Dr BOUREAUX Elodie\", \"total_billed\": 107.76, \"bill_paid\": true, \"amount_paid\": 107.76, \"insured_name\": \"M. GAILLARD ARNAUD\", \"insured_dob\": \"15-02-1988\", \"beneficiary_name\": \"M. GAILLARD ARNAUD\", \"beneficiary_dob\": \"15-02-1988\", \"invoice_date\": \"26-06-2023\", \"security_number\": \"188027218121710\", \"invoice_issuer\": \"OPTIQUE E. LECLERC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson & Johnson - ACUVUE OASYS FOR AST 6L - 14.5/8.6 - BOITE DE 6\", \"quantity\": 2, \"date_of_service\": \"26-06-2023\", \"amount\": 53.88}, {\"description\": \"Johnson & Johnson - ACUVUE OASYS FOR AST 6L - 14.5/8.6 - BOITE DE 6\", \"quantity\": 2, \"date_of_service\": \"26-06-2023\", \"amount\": 53.88}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27565028_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752604603\", \"prescripteur_finess_number\": \"931701882\", \"doctor_name\": \"BENHAMOU\", \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"complementary_coverage\": null, \"client_part\": 30.0, \"insured_name\": \"BOUSELHAM GUILLAUME\", \"insured_dob\": \"08-04-1988\", \"beneficiary_name\": \"BOUSELHAM GUILLAUME\", \"beneficiary_dob\": \"08-04-1988\", \"invoice_date\": \"17-05-2023\", \"security_number\": \"188047510903176\", \"invoice_issuer\": \"SAS FIT OPTIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LENTILLES\", \"amount\": 30.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27570312_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"591153929\", \"rpps_number\": null, \"finess_number\": \"592636534\", \"prescripteur_finess_number\": \"591153929\", \"doctor_name\": \"LEFAKOUONG PASCAL\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 70.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LARUELLE Lorenzo\", \"beneficiary_dob\": \"23-03-1999\", \"invoice_date\": \"20-06-2023\", \"security_number\": \"199035932930860\", \"invoice_issuer\": \"OPTI'SOINS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity (6)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 35.0}, {\"description\": \"Biofinity (6)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 35.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27571476_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 78, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 78, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIVACE AND Naux- Courence\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"implant intra-oculaire AcrySof\\u00ae Toric\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 263.75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27572543_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27572543_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752609073\", \"prescripteur_finess_number\": \"751389693\", \"doctor_name\": \"BESNAINOU\", \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"EDOUARD ERIC\", \"insured_dob\": \"28-03-1968\", \"beneficiary_name\": \"EDOUARD ERIC\", \"beneficiary_dob\": \"28-03-1968\", \"invoice_date\": \"14-06-2023\", \"security_number\": \"168039721315515\", \"invoice_issuer\": \"LES OPTICIENS DU JURA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SOFLENS 59 \\u00d814.2 (8.6) BOITE D E 6\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 160.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27573869_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"251033429\", \"rpps_number\": \"10107908807\", \"finess_number\": \"252621511\", \"doctor_name\": \"Dr LOUNGI Marina\", \"total_billed\": 86.0, \"bill_paid\": true, \"amount_paid\": 86.0, \"insured_name\": \"MME BARRAULT MARINE\", \"insured_dob\": \"13-09-1999\", \"beneficiary_name\": \"MME BARRAULT MARINE\", \"beneficiary_dob\": \"13-09-1999\", \"invoice_date\": \"29-06-2023\", \"security_number\": \"2 99 09 25 388 298 40\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"Menicon VISAUDIO - Indivisual - 14.2/8.6 - INDIVIDUELLE\", \"quantity\": 1, \"amount\": 43.0}, {\"description\": \"Menicon VISAUDIO - Indivisual - 14.2/8.6 - INDIVIDUELLE\", \"quantity\": 1, \"amount\": 43.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27575418_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"312620446\", \"prescripteur_finess_number\": \"751714668\", \"doctor_name\": \"CATHERINE IRLAT\", \"total_billed\": 101.0, \"bill_paid\": true, \"amount_paid\": 101.0, \"complementary_coverage\": null, \"client_part\": 101.0, \"remaining_payment\": 0.0, \"insured_name\": \"TURLAN Cecile\", \"insured_dob\": \"04-01-1995\", \"beneficiary_name\": \"TURLAN Cecile\", \"beneficiary_dob\": \"04-01-1995\", \"invoice_date\": \"20-06-2023\", \"security_number\": \"295013155531758\", \"invoice_issuer\": \"CONCORDE OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BIOFINITY TORIC \\u00d814,5 (8,7)\", \"quantity\": 1, \"amount\": 61.0}, {\"description\": \"BIOFINITY \\u00d814,0 (8,6)\", \"quantity\": 1, \"amount\": 40.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27575667_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"370000861\", \"rpps_number\": null, \"finess_number\": \"372636662\", \"doctor_name\": \"\\u00c9ts POLE TETE ET COU\", \"total_billed\": 78.0, \"bill_paid\": true, \"amount_paid\": 0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 78.0, \"client_part\": 0.0, \"remaining_payment\": 0.0, \"insured_name\": \"M. BARBIER MAXIME\", \"insured_dob\": \"21-04-1984\", \"beneficiary_name\": \"M. BARBIER MAXIME\", \"beneficiary_dob\": \"21-04-1984\", \"invoice_date\": \"26-01-2022\", \"security_number\": \"184043705016326\", \"invoice_issuer\": \"ECOUTER VOIR - Tours Centre\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CooperVision VISAUDIO - BIOFINITY XR86 - 14/8,6 - BOITE DE 6\", \"quantity\": 1, \"amount\": 39.0}, {\"description\": \"CooperVision VISAUDIO - BIOFINITY XR86 - 14/8,6 - BOITE DE 6\", \"quantity\": 1, \"amount\": 39.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27575670_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 78.0, \"bill_paid\": true, \"amount_paid\": 0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 78.0, \"client_part\": 0.0, \"remaining_payment\": 0.0, \"insured_name\": \"BARBIER MAXIME\", \"insured_dob\": \"21-04-1984\", \"beneficiary_name\": \"BARBIER MAXIME\", \"beneficiary_dob\": \"21-04-1984\", \"invoice_date\": \"26-01-2022\", \"security_number\": null, \"invoice_issuer\": \"MUTUALITE FRANCAISE CENTRE- VAL DE LOIRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille Droite\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 39.0}, {\"description\": \"Lentille Gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 39.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27576127_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"972640239\", \"doctor_name\": null, \"total_billed\": 53.0, \"bill_paid\": true, \"amount_paid\": 53.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 48.86, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANCUSO Mireille\", \"beneficiary_dob\": null, \"invoice_date\": \"24-06-2023\", \"security_number\": null, \"invoice_issuer\": \"CHEVILLARD SAINTE CLOTILDE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BNLFR SofLens\\u00ae Multifocal High (bo\\u00eete de 6), diam : 14.5, ray : 8.8, souple, journalier, Ren: Mensuel (par 6)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 53}, {\"description\": \"BNLFR SofLens\\u00ae Multifocal High (bo\\u00eete de 6), diam : 14.5, ray : 8.8, souple, journalier, Ren: Mensuel (par 6)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 53}, {\"description\": \"Facture accompte\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -53}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27578640_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"541004537\", \"rpps_number\": \"10000780519\", \"finess_number\": \"132664632\", \"doctor_name\": \"Dr KUNTER Florence\", \"total_billed\": 56.25, \"bill_paid\": true, \"amount_paid\": 56.25, \"insured_name\": \"M. MALLOUK FAYCEL\", \"insured_dob\": \"19-09-1985\", \"beneficiary_name\": \"M. MALLOUK FAYCEL\", \"beneficiary_dob\": \"19-09-1985\", \"invoice_date\": \"03-04-2023\", \"security_number\": \"185095432906556\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson & Johnson Medical SAS - ACUVUE OASYS for ASTIGMATISM 6L - 14.5/8.6 - BOITE DE 6\", \"quantity\": 1, \"amount\": 32.25}, {\"description\": \"Johnson & Johnson Medical SAS - ACUVUE OASYS with HYDRACLEAR\\u00ae Plus 6L - 14/8.4 - BOITE DE 6\", \"quantity\": 1, \"amount\": 24.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27579295_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132656216\", \"prescripteur_finess_number\": \"131159840\", \"doctor_name\": \"GHARBI BADRA\", \"total_billed\": 480.0, \"bill_paid\": true, \"amount_paid\": 480.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"BENCHENNI LAHOUARIA\", \"insured_dob\": \"12-01-1977\", \"beneficiary_name\": \"BENCHENNI LAHOUARIA\", \"beneficiary_dob\": \"12-01-1977\", \"invoice_date\": \"11-05-2023\", \"security_number\": \"2 77 01 69 384 090 33\", \"invoice_issuer\": \"A PLEINE VUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 LENTILLES 1 DAY ACUVUE MOIST MULTIF MED 30L \\u00d814.3 (8.4) Boite de 90L\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 240.0}, {\"description\": \"2 LENTILLES 1 DAY ACUVUE MOIST MULTIF MED 30L \\u00d814.3 (8.4) Boite de 90L\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 240.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27579724_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"751635566\", \"rpps_number\": null, \"prescripteur_finess_number\": \"751635566\", \"finess_number\": \"932656440\", \"doctor_name\": \"BURTIN\", \"total_billed\": 21.0, \"bill_paid\": true, \"amount_paid\": 21.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Yvonne LEBRIN\", \"beneficiary_dob\": \"22-05-1970\", \"invoice_date\": \"25-05-2023\", \"security_number\": \"270059408002229\", \"invoice_issuer\": \"OPTIQUE LEBOVIC SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Alcon France -20 rue des deux parcs- FR-92563 RUEIL MALMAISON 2 x FRESHLOOK COLORBLEND VERT AVAN Lentille Souple Frequent-Port Prolong\\u00e9 Conditionnement : 2 Quantit\\u00e9: 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 21.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27579725_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"751635566\", \"rpps_number\": null, \"prescripteur_finess_number\": \"751635566\", \"finess_number\": \"930565440\", \"doctor_name\": \"BURTIN\", \"total_billed\": 21.0, \"bill_paid\": true, \"amount_paid\": 21.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 21.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Yvonne LEBRIN\", \"beneficiary_dob\": \"22-05-1970\", \"invoice_date\": \"25-05-2023\", \"security_number\": \"270059408900229\", \"invoice_issuer\": \"OPTIQUE LEBOVIC SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Alcon France -20 rue des deux gares- FR-92563 RUEIL MALMAISON 2 x FRESHLOOK COLORBLEND VERT AXMAN Lentille Souple: Frequent, Port Prolong\\u00e9\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 21.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27580414_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"781713573\", \"rpps_number\": null, \"prescripteur_finess_number\": \"781713573\", \"finess_number\": \"782666648\", \"doctor_name\": \"FALAH SABRINA\", \"total_billed\": 40.32, \"bill_paid\": true, \"amount_paid\": 40.32, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie Dorlys MOUROUVIN\", \"beneficiary_dob\": \"13-10-1982\", \"invoice_date\": \"03-06-2023\", \"security_number\": \"282109741004788\", \"invoice_issuer\": \"KRYS VELIZY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Alcon France -20 rue des deux parcs- FR-92563 RUPIL MAI MAISON 2 x FRESH LOOK COLORBLEND GRIS PERLE Lentille Souple\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.16}, {\"description\": \"Alcon France -20 rue des deux parcs- FR-92563 RUPIL MAI MAISON 2 x FRESH LOOK COLORBLEND CARAMEL Lentille Souple\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.16}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27580555_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"211043955\", \"rpps_number\": null, \"finess_number\": \"212696942\", \"doctor_name\": \"MATTHIEU SOULIE-STROUGAR\", \"total_billed\": 42.4, \"bill_paid\": true, \"amount_paid\": 42.4, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ALDAYA Marl\\u00e8ne\", \"beneficiary_dob\": null, \"invoice_date\": \"06-05-2023\", \"security_number\": \"2920721054258\", \"invoice_issuer\": \"GrandOptical DIJON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - BIOFINITY TORIC Dur\\u00e9e de vie:Mensuelle (PU:21.20\\u20ac)SuppleCopaysvisionBoite De 3 Lentilles\", \"quantity\": 1, \"date_of_service\": \"\", \"mandatory_coverage\": \"\", \"amount\": 21.2}, {\"description\": \"Lentille G - BIOFINITY TORIC Dur\\u00e9e de vie:Mensuelle (PU:21.20\\u20ac)SuppleCopaysvisionBoite De 3 Lentilles\", \"quantity\": 1, \"date_of_service\": \"\", \"mandatory_coverage\": \"\", \"amount\": 21.2}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27586729_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"062602107\", \"doctor_name\": null, \"total_billed\": 190.0, \"bill_paid\": true, \"amount_paid\": 190.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ESPINASSIER Olga\", \"beneficiary_dob\": null, \"security_number\": \"2 44 12 99 63 48 22\", \"invoice_date\": \"13-06-2023\", \"invoice_issuer\": \"Laboratoire PROCONTACT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact \\u00e0 renouvellement fr\\u00e9quent\", \"amount\": 190.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27588122_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"141099838\", \"rpps_number\": null, \"finess_number\": \"342660149\", \"doctor_name\": \"GENERIQUE MEDECIN\", \"total_billed\": 23.6, \"bill_paid\": true, \"amount_paid\": 23.6, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DACQUIN Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"07-12-2022\", \"security_number\": \"2810478646272\", \"invoice_issuer\": \"Grand Optical MONTPELLIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - EYEXPERT SILK - Bo\\u00eete de 3 lentilles Dur\\u00e9e de vie:Mensuelle (PU:11,80\\u20ac)SurpleCopervisionBo\\u00eete De 3 Lentilles\", \"quantity\": 1, \"date_of_service\": \"\", \"mandatory_coverage\": \"\", \"amount\": 11.8}, {\"description\": \"Lentille G - EYEXPERT SILK - Bo\\u00eete de 3 lentilles Dur\\u00e9e de vie:Mensuelle (PU:11,80\\u20ac)SurpleCopervisionBo\\u00eete De 3 Lentilles\", \"quantity\": 1, \"date_of_service\": \"\", \"mandatory_coverage\": \"\", \"amount\": 11.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27588377_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"942614223\", \"prescripteur_finess_number\": \"940026214\", \"doctor_name\": \"CENTRE OPHT Choisy\", \"total_billed\": 80.0, \"bill_paid\": true, \"amount_paid\": 80.0, \"insured_name\": \"ROGER Lucas\", \"insured_dob\": \"09-07-1996\", \"beneficiary_name\": \"ROGER Lucas\", \"beneficiary_dob\": \"09-07-1996\", \"invoice_date\": \"04-07-2023\", \"security_number\": \"196079531323645\", \"invoice_issuer\": \"optikel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"DAILIES AquaComfort Plus Toric 30L \\u00d814.4 (8.8)\", \"quantity\": 1, \"amount\": 40.0}, {\"description\": \"DAILIES AquaComfort Plus Toric 30L \\u00d814.4 (8.8)\", \"quantity\": 1, \"amount\": 40.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27588485_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"942614223\", \"prescripteur_finess_number\": \"771716131\", \"total_billed\": 600.0, \"bill_paid\": true, \"amount_paid\": 599.91, \"mandatory_coverage\": 0.09, \"complementary_coverage\": null, \"client_part\": 599.91, \"remaining_payment\": 0.0, \"insured_name\": \"CAMPENON Fabrice\", \"insured_dob\": \"07-07-1970\", \"beneficiary_name\": \"CAMPENON Fabrice\", \"beneficiary_dob\": \"07-07-1970\", \"invoice_date\": \"27-06-23\", \"security_number\": \"170075819404367\", \"invoice_issuer\": \"Optikel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"DE RI - POLICE - VPLB64 - 570627 - 0/0 (1)\", \"quantity\": 1, \"amount\": 200.0}, {\"description\": \"HEP - Innovator Basic 150 Ultra - 70/75\", \"quantity\": 2, \"amount\": 400.0}], \"doctor_name\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27590035_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"751571787\", \"rpps_number\": null, \"finess_number\": \"752610626\", \"doctor_name\": \"Mr LAHIANY SAMY\", \"total_billed\": 204.0, \"bill_paid\": true, \"amount_paid\": 204.0, \"mandatory_coverage\": 0, \"complementary_coverage\": 0, \"client_part\": 204, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRIFFARD Gael\", \"beneficiary_dob\": \"04-09-1987\", \"invoice_date\": \"29-06-2023\", \"security_number\": \"187099710106313\", \"invoice_issuer\": \"Optique M\\u00e9dical Mouzaia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"lentilles hr boite 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 102.0}, {\"description\": \"lentilles hr boite 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 102.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27591901_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 35.8, \"bill_paid\": true, \"amount_paid\": 35.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Sabrina BECKER\", \"beneficiary_dob\": null, \"invoice_date\": \"02-07-2023\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SofLens daily disposable (90) (Sph\\u00e9rique) Oeil droit Sph\\u00e8re (PWR) : -3,00 (myope) Rayon (BC) : 8,60 Diam\\u00e8tre (DIA) : 14,20\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 32.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 2.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27601381_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"972612022\", \"doctor_name\": null, \"total_billed\": 32.0, \"bill_paid\": true, \"amount_paid\": 32.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"MARTEL Alexis\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"07-07-2023\", \"security_number\": \"1871102408065\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique BASSE TERRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - iWear Hydro (6) Dur\\u00e9e de vie Mensuelle (PU:16.00\\u20ac)Souple/Coopervision/Boite De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 16.0}, {\"description\": \"Lentille G - iWear Hydro (6) Dur\\u00e9e de vie Mensuelle (PU:16.00\\u20ac)Souple/Coopervision/Boite De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 16.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27601438_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752605618\", \"doctor_name\": null, \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. REYREYNIER FRANCOISE\", \"beneficiary_dob\": null, \"invoice_date\": \"08-07-2023\", \"security_number\": \"261068403141785\", \"invoice_issuer\": \"Fac Optique\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LENTILLE D : Novacel Air Optix Plus Hydraglyde Multifocale High 6L\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"LENTILLE G : Novacel Air Optix Plus Hydraglyde Multifocale High 6L\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27601451_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"941701039\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BARALE PIERRE-OLIVIER\", \"total_billed\": 141.16, \"bill_paid\": true, \"amount_paid\": 79.13, \"mandatory_coverage\": 62.03, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"GUILLON SERGE\", \"insured_dob\": \"05-07-1950\", \"beneficiary_name\": \"GUILLON SERGE\", \"beneficiary_dob\": \"05-07-1950\", \"invoice_date\": \"30-06-2023\", \"security_number\": \"150078713300793\", \"invoice_issuer\": \"Dr PIERRE OLIVIER BARALE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BZQK001 Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": null, \"date_of_service\": \"30-06-2023\", \"mandatory_coverage\": 47.88, \"amount\": 120.0}, {\"description\": \"BGQP002 Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": null, \"date_of_service\": \"30-06-2023\", \"mandatory_coverage\": 14.15, \"amount\": 21.16}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27601539_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"572620664\", \"prescripteur_finess_number\": \"571038835\", \"doctor_name\": \"SIMATTI Eric\", \"total_billed\": 94.0, \"bill_paid\": true, \"amount_paid\": 46.62, \"mandatory_coverage\": 47.38, \"complementary_coverage\": null, \"client_part\": 46.62, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VALLEE Benedicte\", \"invoice_date\": \"05-07-23\", \"security_number\": \"282115763106961\", \"invoice_issuer\": \"MAGNY OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Toric 6L \\u00d814.5 (8.7)\", \"quantity\": 1, \"amount\": 47.0}, {\"description\": \"Biofinity Toric 6L \\u00d814.5 (8.7)\", \"quantity\": 1, \"amount\": 47.0}], \"beneficiary_dob\": \"18-11-1992\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27603575_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"922618426\", \"doctor_name\": null, \"total_billed\": 448.0, \"bill_paid\": true, \"amount_paid\": 448.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MELOT Stephane Louis\", \"beneficiary_dob\": null, \"invoice_date\": \"05-07-2023\", \"security_number\": null, \"invoice_issuer\": \"OPTIQUE BONOT LGC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ALC Dailies Total 1 Multifocal High 90L, diam : 14.1, ray : 8.5, souple, journalier, Ren: 1 jour (par 90)\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 224.0}, {\"description\": \"ALC Dailies Total 1 Multifocal High 90L, diam : 14.1, ray : 8.5, souple, journalier, Ren: 1 jour (par 90)\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 224.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27603799_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27603799_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"922614250\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 73.7, \"bill_paid\": true, \"amount_paid\": 73.7, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Benoit Bouyer\", \"beneficiary_dob\": null, \"invoice_date\": \"07-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Krys.com\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact Biofinity toric Bo\\u00eete de 6 Coopervision Osi\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 33.9}, {\"description\": \"Lentilles de contact Biofinity toric Bo\\u00eete de 6 Coopervision Osi\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 33.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27606001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"922614250\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 41.8, \"bill_paid\": true, \"amount_paid\": 41.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Madame Cheherazed SIDI-YACOUB\", \"beneficiary_dob\": null, \"invoice_date\": \"13-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Krys\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact Menicon Premio Bo\\u00eete de 6 Menicon Europe\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.9}, {\"description\": \"Lentilles de contact Menicon Premio Bo\\u00eete de 6 Menicon Europe\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27607868_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"342691821\", \"prescripteur_finess_number\": \"341059442\", \"doctor_name\": \"JEAN CHARLES CLAUDE\", \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BRUEL ROGER\", \"beneficiary_dob\": null, \"invoice_date\": \"04-07-2023\", \"security_number\": null, \"invoice_issuer\": \"L OPTICIENNE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPHTALMIC OSV 150 PCC2 Org 70\", \"quantity\": 1, \"amount\": 80.0}, {\"description\": \"OPHTALMIC OSV 150 PCC2 Org 70\", \"quantity\": 1, \"amount\": 80.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27611284_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"842607020\", \"prescripteur_finess_number\": \"841702913\", \"doctor_name\": \"FOURMENT Pierre\", \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 30.0, \"remaining_payment\": 0.0, \"insured_name\": \"ACCOSSANO Pauline\", \"insured_dob\": \"19-07-1981\", \"beneficiary_name\": \"ACCOSSANO Pauline\", \"beneficiary_dob\": \"19-07-1981\", \"invoice_date\": \"05-07-2023\", \"security_number\": \"281071302803654\", \"invoice_issuer\": \"ALAINAFFLELOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Eph\\u00e9m\\u00e8re Confort Silicone Hydrogel 30L \\u00d814.0 (8.6)\", \"quantity\": 1, \"date_of_service\": \"05-07-2023\", \"mandatory_coverage\": null, \"amount\": 15.0}, {\"description\": \"Eph\\u00e9m\\u00e8re Confort Silicone Hydrogel 30L \\u00d814.0 (8.6)\", \"quantity\": 1, \"date_of_service\": \"05-07-2023\", \"mandatory_coverage\": null, \"amount\": 15.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27613310_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132866967\", \"prescripteur_finess_number\": \":130052939\", \"doctor_name\": \"CENTRE OPHTALMOLOGlQUE\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 60.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SARKISSIAN Edgar\", \"beneficiary_dob\": \"06-05-1981\", \"invoice_date\": \"12-07-2023\", \"security_number\": \"1 81 05 99 252 001 86\", \"invoice_issuer\": \"Amiel Optique\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille OPH HR 1DAY PROG LOW 30L (OPH) - 30 (OPHTALMIO) (Bte de 30)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 60.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27617919_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"102600053\", \"doctor_name\": null, \"total_billed\": 59.0, \"bill_paid\": true, \"amount_paid\": 59.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"CARDOSO Jessica\", \"insured_dob\": null, \"beneficiary_name\": \"CARDOSO Jessica\", \"beneficiary_dob\": null, \"invoice_date\": \"13-05-2023\", \"security_number\": \"2 87 11 95 203 030\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique SAINT ANDRE LES VERGERS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - SOFLENS DAILY DISPOSABLE 90 Dur\\u00e9e de vie:Journali\\u00e8re (PU:59.00\\u20ac)Sph\\u00e8reBausch & LombBo\\u00eete De 90 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 59.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27618314_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 77.55, \"bill_paid\": true, \"amount_paid\": 77.55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jennifer Fernandes\", \"beneficiary_dob\": null, \"invoice_date\": \"01-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Lensvision.fr\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acuvue Oasys 1-Day - 30 lentilles journali\\u00e8res - Sph -6 5 BC 8 5 D 14 3\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.75}, {\"description\": \"ACUVUE OASYS 1-Day with HydraLuxe - 90 lentilles journali\\u00e8res - Sph -6 5 BC 8 5 D 14 3\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 56.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27618927_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27618927_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"571037464\", \"rpps_number\": \"10002376316\", \"finess_number\": \"572627479\", \"doctor_name\": \"DR GASS Michel\", \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"M. POCH GREGORY\", \"insured_dob\": \"07-03-1978\", \"beneficiary_name\": \"M. POCH GREGORY\", \"beneficiary_dob\": \"07-03-1978\", \"invoice_date\": \"15-07-2023\", \"security_number\": \"178035767205530\", \"invoice_issuer\": \"ALAINAFFLELOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Alcon - L'Eph\\u00e9m\\u00e8re Confort + 30L - 14/8.7 - BOITE DE 30\", \"quantity\": 1, \"date_of_service\": \"15-07-2023\", \"mandatory_coverage\": null, \"amount\": 15.0}, {\"description\": \"Alcon - L'Eph\\u00e9m\\u00e8re Confort + 30L - 14/8.7 - BOITE DE 30\", \"quantity\": 1, \"date_of_service\": \"15-07-2023\", \"mandatory_coverage\": null, \"amount\": 15.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27619261_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"951701994\", \"rpps_number\": \"10001325991\", \"finess_number\": \"952667731\", \"doctor_name\": \"Dr LEROY-TESTAS Fabienne\", \"total_billed\": 192.0, \"bill_paid\": true, \"amount_paid\": 100.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 100.0, \"remaining_payment\": 0.0, \"insured_name\": \"ARTAN GUZADE\", \"insured_dob\": \"11-03-1978\", \"beneficiary_name\": \"ARTAN GUZADE\", \"beneficiary_dob\": \"11-03-1978\", \"invoice_date\": \"08-07-2023\", \"security_number\": \"278034900717841\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CooperVision - Biofinity Toric (6) - 14.5/8.7 - BOITE DE 6\", \"quantity\": 2, \"date_of_service\": \"08-07-2023\", \"mandatory_coverage\": null, \"amount\": 96.0}, {\"description\": \"CooperVision - Biofinity Toric (6) - 14.5/8.7 - BOITE DE 6\", \"quantity\": 2, \"date_of_service\": \"08-07-2023\", \"mandatory_coverage\": null, \"amount\": 96.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27619324_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"642620512\", \"doctor_name\": null, \"total_billed\": 150.0, \"bill_paid\": true, \"amount_paid\": 150.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PIGNAULT Malvyna\", \"beneficiary_dob\": null, \"invoice_date\": \"13-07-2023\", \"security_number\": null, \"invoice_issuer\": \"FQSC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"JNJ (ATO) ACUVUE\\u00ae OASYS 1-Day 90P, diam : 14.3, ray : 8.5, souple, Journalier, Rem: 1 jour (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 75.0}, {\"description\": \"JNJ (ATO) ACUVUE\\u00ae OASYS 1-Day 90P, diam : 14.3, ray : 8.5, souple, Journalier, Rem: 1 jour (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 75.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27620389_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752601302\", \"prescripteur_finess_number\": \"750065039\", \"doctor_name\": \"CENTRE ACCES VISION PARIS 8\", \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 160.0, \"remaining_payment\": 0, \"insured_name\": \"TOUSSAINT REMY\", \"insured_dob\": \"17-04-1977\", \"beneficiary_name\": \"TOUSSAINT REMY\", \"beneficiary_dob\": \"17-04-1977\", \"invoice_date\": \"08-07-2023\", \"security_number\": \"177048400713431\", \"invoice_issuer\": \"Optique de la Madeleine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"One DAY Acuvue Moist Multi 1B\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"One DAY Acuvue Moist Multi 1B\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 80.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27621196_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131105066\", \"rpps_number\": \"10004356566\", \"finess_number\": \"132665456\", \"doctor_name\": \"POLGE Serge-frederic\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 50.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lucas SANTIAGO\", \"beneficiary_dob\": \"17-04-2000\", \"invoice_date\": \"04-07-2023\", \"security_number\": \"1000430007120 38\", \"invoice_issuer\": \"SASU AM OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Coop\\u00e9ration France- 1800 Route des Cr\\u00eates-FR 06560 Sophia Antipolis 6 x 5 KRYS MENSUELLE Lentille Souple. Fr\\u00e9quent Port Prolongu\\u00e9. Conditionnement: 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 25.0}, {\"description\": \"Coop\\u00e9ration France- 1800 Route des Cr\\u00eates-FR 06560 Sophia Antipolis 6 x 5 KRYS MENSUELLE Lentille Souple. Fr\\u00e9quent Port Prolongu\\u00e9. Conditionnement: 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 25.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27646193_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27646193_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 89.7, \"bill_paid\": true, \"amount_paid\": 89.7, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dominique GUINNEFOLLAU\", \"beneficiary_dob\": null, \"invoice_date\": \"19-07-2023\", \"security_number\": \"2 66 11 29 019 057 88\", \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"eversee plus 360ml\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 17.7}, {\"description\": \"Proclear multifocal (6) (Lentilles sph\\u00e9riques , multifocales) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 42.9}, {\"description\": \"Proclear multifocal (6) (Lentilles sph\\u00e9riques , multifocales) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 42.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 4.9}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -4.9}, {\"description\": \"R\\u00e9duction produit(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -13.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27646630_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"762652154\", \"prescripteur_finess_number\": \"761123504\", \"doctor_name\": \"ATTAL Philippe\", \"total_billed\": 188.0, \"bill_paid\": true, \"amount_paid\": 188.0, \"complementary_coverage\": null, \"client_part\": 188.0, \"insured_name\": \"THOMAS CHRISTEL\", \"insured_dob\": \"25-07-1969\", \"beneficiary_name\": \"THOMAS CHRISTEL\", \"beneficiary_dob\": \"25-07-1969\", \"invoice_date\": \"16-03-2019\", \"security_number\": \"269079201222066\", \"invoice_issuer\": \"LUNET'RY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPH RXM SPHERIC PROG LOW bleut\\u00e9e \\u00d814 (9) Boite de 6\", \"amount\": 113.0}, {\"description\": \"OPHTALMIC RXM SPHERIC bleut\\u00e9e \\u00d813 (8.6) Boite de 6\", \"amount\": 75.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27646631_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"762652154\", \"prescripteur_finess_number\": \"761123504\", \"doctor_name\": \"ATTAL Philippe\", \"total_billed\": 208.0, \"bill_paid\": true, \"amount_paid\": 208.0, \"complementary_coverage\": null, \"client_part\": 208.0, \"insured_name\": \"THOMAS CHRISTEL\", \"insured_dob\": \"25-07-1969\", \"beneficiary_name\": \"THOMAS CHRISTEL\", \"beneficiary_dob\": \"25-07-1969\", \"invoice_date\": \"30-07-2020\", \"security_number\": \"269079201222066\", \"invoice_issuer\": \"LUNET'RY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPH RXM SPHERIC PROG LOW bleut\\u00e9e \\u00d814 (9) Boite de 6\", \"amount\": 123.0}, {\"description\": \"OPHTALMIC RXM SPHERIC bleut\\u00e9e \\u00d813 (8.6) Boite de 6 jazz bleu\", \"amount\": 85.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27647325_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"942612987\", \"prescripteur_finess_number\": \"940026214\", \"doctor_name\": \"OOPHTALYA CHOISY LE ROI\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"complementary_coverage\": null, \"client_part\": 60.0, \"insured_name\": \"KIUSI ALBERT\", \"insured_dob\": \"05-04-1983\", \"beneficiary_name\": \"KIUSI ALBERT\", \"beneficiary_dob\": \"05-04-1983\", \"invoice_date\": \"11-07-2022\", \"security_number\": \"183049931201501\", \"invoice_issuer\": \"Optic Alain\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SOFLENS 59 \\u00d814 2 (8.6) Boite de 6\", \"amount\": 60.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27648332_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"770025526\", \"rpps_number\": null, \"finess_number\": \"752604710\", \"doctor_name\": \"centre acces vision serris\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 60.0, \"remaining_payment\": 0.0, \"insured_name\": \"Emilie KHOR\", \"insured_dob\": \"27-12-1987\", \"beneficiary_name\": \"Emilie KHOR\", \"beneficiary_dob\": \"27-12-1987\", \"invoice_date\": \"21-05-2023\", \"security_number\": \"287128708525980\", \"invoice_issuer\": \"VISION CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1-DAY ACUVUE\\u00ae MOIST 90P -0.50 N\\u00b0 Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 60.0}, {\"description\": \"1-DAY ACUVUE\\u00ae MOIST 90P +0.50 N\\u00b0 Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 0.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27651227_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27651227_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"total_billed\": 173.99, \"bill_paid\": true, \"amount_paid\": 173.99, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fabrice CARROI\", \"beneficiary_dob\": null, \"invoice_date\": \"06-07-2023\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Toric Multifocal Corrections OEIL DROIT\", \"quantity\": 1, \"amount\": 129.95}, {\"description\": \"Biofinity Multifocal Corrections OEIL GAUCHE\", \"quantity\": 1, \"amount\": 44.04}], \"doctor_name\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27651465_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27651465_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"75261700\", \"doctor_name\": null, \"total_billed\": 148.6, \"bill_paid\": true, \"amount_paid\": 148.6, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie GARDY\", \"beneficiary_dob\": null, \"invoice_date\": \"05-07-2023\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"DAILIES TOTAL 1 For Astigmatism (30) (toriques) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 37.9}, {\"description\": \"DAILIES TOTAL 1 For Astigmatism (30) (toriques) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 37.9}, {\"description\": \"Oxysept 1 Step 3x300ml + 90c\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 34.9}, {\"description\": \"Proclear 1 day (30) (Lentilles sph\\u00e9riques) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 4.9}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -4.9}, {\"description\": \"R\\u00e9duction produit(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -8.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27654057_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"221018419\", \"rpps_number\": null, \"finess_number\": \"222651358\", \"doctor_name\": \"YANNIE SEBILLE-KERNAUDOUR\", \"total_billed\": 63.0, \"bill_paid\": true, \"amount_paid\": 63.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 63.0, \"remaining_payment\": 0.0, \"insured_name\": \"M. LE MADEC Alan\", \"insured_dob\": null, \"beneficiary_name\": \"M. LE MADEC Alan\", \"beneficiary_dob\": null, \"invoice_date\": \"27-07-2023\", \"security_number\": \"1 67 10 22 070 243\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique LANNION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - DAILIES ALL DAY COMFORT 90L Incolore Dur\\u00e9e de vie Journali\\u00e8re (PU:63.00\\u20ac)SoupleAlcon Vision CareBotte De 90 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 63.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27654482_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 102.22, \"bill_paid\": true, \"amount_paid\": 102.22, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. R\\u00e9gis ROBERT\", \"beneficiary_dob\": null, \"invoice_date\": \"27-07-2023\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biotrue For Presbyopia (30) (Sph\\u00e9rique , Multifocale) Oeil droit\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 59.8}, {\"description\": \"Biotrue For Presbyopia (30) (Sph\\u00e9rique , Multifocale) Oeil gauche\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 59.8}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.9}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -5.9}, {\"description\": \"R\\u00e9duction produit(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -17.38}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27655301_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"781074547\", \"rpps_number\": \"10000749852\", \"finess_number\": \"782665921\", \"doctor_name\": \"PAILLARD Daniel\", \"total_billed\": 128.0, \"bill_paid\": true, \"amount_paid\": 128.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"beneficiary_name\": \"M. MARTINEZ MATHIEU\", \"beneficiary_dob\": \"02-05-1994\", \"invoice_date\": \"29-07-2023\", \"security_number\": null, \"invoice_issuer\": \"OPTIC 2000\", \"currency\": \"EUR\", \"items\": [{\"description\": \"COOPERVISION - MyDay (90) - 14.2/8.4 - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 64.0}, {\"description\": \"COOPERVISION - MyDay (90) - 14.2/8.4 - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 64.0}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27658074_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132651753\", \"prescripteur_finess_number\": \"131189177\", \"doctor_name\": \"TOUBOUL Denis\", \"total_billed\": 150.0, \"bill_paid\": true, \"amount_paid\": 150.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 150.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAHOUDEAU Fatiha\", \"beneficiary_dob\": \"09-04-1958\", \"invoice_date\": \"12-07-2023\", \"security_number\": \"258049935451991\", \"invoice_issuer\": \"PHIL'OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPHTAMIC HR TORIC \\u00d814.4 (8.7)\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"OPHTAMIC HR TORIC \\u00d814.4 (8.7)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27660322_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"202645172\", \"doctor_name\": null, \"total_billed\": 45.0, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DIONISIO Michael\", \"beneficiary_dob\": null, \"invoice_date\": \"03-07-2023\", \"security_number\": null, \"invoice_issuer\": \"SARL CAP OPTIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ALC (ATO) DAILIES AquaComfort Plus 90L, diam : 14, ray : 8.7, souple, Journalier, Rem: 1 jour (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 27025158_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Attestation de droits RO-25935660-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ksnip_20250625-171328_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/facture-engie-page-3-349x500_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB_Notaire_Vendeur-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/287b78bf-1861-467f-973f-21aad6be26f1_livret_de_famille_Humez_25022024-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27661030_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27661030_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"doctor_name\": null, \"total_billed\": 115.16, \"bill_paid\": true, \"amount_paid\": 115.16, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mr Younes BALLOUK\", \"beneficiary_dob\": null, \"invoice_date\": \"17-08-2022\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Toric Corrections OEIL GAUCHE\", \"quantity\": 2, \"amount\": 57.58}, {\"description\": \"Biofinity Toric Corrections OEIL DROIT\", \"quantity\": 2, \"amount\": 57.58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27661189_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"201409513\", \"rpps_number\": null, \"finess_number\": \"202645420\", \"doctor_name\": \"Dr PASQUALINI Francois\", \"total_billed\": 178.0, \"bill_paid\": true, \"amount_paid\": 178.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Mme. LEFEVRE ISABELLE\", \"insured_dob\": \"25-03-1957\", \"beneficiary_name\": \"LEFEVRE ISABELLE\", \"beneficiary_dob\": \"25-03-1957\", \"invoice_date\": \"21-07-2023\", \"security_number\": \"257037506810566\", \"invoice_issuer\": \"Furiani Optique\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Dailies Total 90 Alcon - DAILIES TOTAL 1 90L - 14.1/8.5 - BOITE DE 90\", \"quantity\": 1, \"amount\": 89.0}, {\"description\": \"Dailies Total 90 Alcon - DAILIES TOTAL 1 90L - 14.1/8.5 - BOITE DE 90\", \"quantity\": 1, \"amount\": 89.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27663036_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27663036_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"932668057\", \"prescripteur_finess_number\": \"750063844\", \"doctor_name\": \"SAINT CYR SANTE\", \"total_billed\": 360.0, \"bill_paid\": true, \"amount_paid\": 360.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 360.0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KADOUR YASMIN\", \"beneficiary_dob\": \"26-09-2007\", \"invoice_date\": \"09-06-2023\", \"security_number\": \"277019935240562\", \"invoice_issuer\": \"OPTIQUE SAINT OUEN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 soflens dailies disposable 90 hors tips\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 180.0}, {\"description\": \"2 soflens dailies disposable 90 hors tips\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 180.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27663184_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27663184_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132601790\", \"prescripteur_finess_number\": \"131751521\", \"doctor_name\": \"Thomas THIBAULT\", \"total_billed\": 200.0, \"bill_paid\": true, \"amount_paid\": 200.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 200, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AHYERRE Justine\", \"beneficiary_dob\": \"03-06-2004\", \"security_number\": \"269020602910372\", \"invoice_date\": \"26-06-2023\", \"invoice_issuer\": \"ALAIN AFFLELOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Toric XR (6) \\u00d814.5 (8.7)\", \"quantity\": 1, \"amount\": 100.0}, {\"description\": \"Biofinity Toric XR (6) \\u00d814.5 (8.7)\", \"quantity\": 1, \"amount\": 100.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27664783_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"741705701\", \"rpps_number\": \"10003442000\", \"finess_number\": \"742600885\", \"doctor_name\": \"HOJABR Amir Theodore\", \"total_billed\": 69.0, \"bill_paid\": true, \"amount_paid\": 69.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 69.0, \"remaining_payment\": 0.0, \"insured_name\": \"M. CABROL SYLVAIN\", \"insured_dob\": \"06-11-1986\", \"beneficiary_name\": \"M. CABROL SYLVAIN\", \"beneficiary_dob\": \"06-11-1986\", \"invoice_date\": \"21-07-2023\", \"security_number\": \"186112636203925\", \"invoice_issuer\": \"Optic 2000\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CooperVision - DIA SIHY X6 - 14/8.6 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 34.5}, {\"description\": \"CooperVision - DIA SIHY X6 - 14/8.6 - BOITE DE 6\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 34.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27665533_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"920036886\", \"rpps_number\": null, \"finess_number\": \"922620430\", \"doctor_name\": \"instant vision\", \"total_billed\": 92.0, \"bill_paid\": true, \"amount_paid\": 92.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 92.0, \"remaining_payment\": null, \"insured_name\": \"GLANDOR ALICE\", \"insured_dob\": \"30-06-1960\", \"beneficiary_name\": \"GLANDOR ALICE\", \"beneficiary_dob\": \"30-06-1960\", \"invoice_date\": \"26-07-2023\", \"security_number\": \"260069710511992\", \"invoice_issuer\": \"ROBBE OPTIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1-DAY ACUVUE\\u00ae MOIST 30P\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 46.0}, {\"description\": \"1-DAY ACUVUE\\u00ae MOIST 30P\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 46.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27665789_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27665789_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"832620090\", \"doctor_name\": null, \"total_billed\": 94.0, \"bill_paid\": true, \"amount_paid\": 94.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BACHOT Sebastien\", \"beneficiary_dob\": null, \"invoice_date\": \"13-07-2023\", \"security_number\": null, \"invoice_issuer\": \"ATOL COGOLIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ALC (ATO) Dailies Aquacomfort Plus 90l, diam : 14, ray : 8.7, souple, journalier, Ren: 1 jour (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.0}, {\"description\": \"ALC (ATO) Dailies Aquacomfort Plus 90l, diam : 14, ray : 8.7, souple, journalier, Ren: 1 jour (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27667744_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"591758495\", \"rpps_number\": \"10100951200\", \"finess_number\": \"592640262\", \"doctor_name\": \"Dr GOGNEAUX\", \"total_billed\": 102.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"M. REYNAERT BENJAMIN\", \"insured_dob\": \"21-09-1982\", \"beneficiary_name\": \"M. REYNAERT BENJAMIN\", \"beneficiary_dob\": \"21-09-1982\", \"invoice_date\": \"06-05-2023\", \"security_number\": \"182095951224227\", \"invoice_issuer\": \"DINVUE SARL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CooperVision - Biofinity Toric 6L - 14.5/8.7 - BOITE DE 6\", \"quantity\": 1, \"amount\": 51.0}, {\"description\": \"CooperVision - Biofinity Toric 6L - 14.5/8.7 - BOITE DE 6\", \"quantity\": 1, \"amount\": 51.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27668729_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27668729_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 66.7, \"bill_paid\": true, \"amount_paid\": 66.7, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laurence ROUSSEY\", \"beneficiary_dob\": null, \"invoice_date\": \"12-07-2023\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"eversee plus 360ml\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 17.7}, {\"description\": \"SofLens For Astigmatism (6) (toriques) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.9}, {\"description\": \"SofLens For Astigmatism (6) (toriques) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 4.9}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -4.9}, {\"description\": \"R\\u00e9duction produit(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -12.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27669574_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"750060394\", \"rpps_number\": null, \"finess_number\": \"832696892\", \"doctor_name\": \"NASR Mayssa\", \"total_billed\": 54.0, \"bill_paid\": true, \"amount_paid\": 54, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 54.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SEBASTIEN CANNICCIONI\", \"beneficiary_dob\": \"23-02-1977\", \"invoice_date\": \"03-08-2023\", \"security_number\": \"177028313719344\", \"invoice_issuer\": \"OPTIQUE OPERA sarl pro vision\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1DAY ACUVUE MOIST 30L N\\u00b0 Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 27.0}, {\"description\": \"1DAY ACUVUE MOIST 30L N\\u00b0 Lot 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 27.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27670019_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"932634215\", \"prescripteur_finess_number\": \"750010555\", \"doctor_name\": \"LAM Delphine\", \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GAILLARD Helene\", \"beneficiary_dob\": null, \"invoice_date\": \"21-12-2022\", \"security_number\": null, \"invoice_issuer\": \"BISMUTH FRERES OPTICIENS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CIBA VISION DAILIES TOTAL 1 90L \\u00d814.1 (8.5) Boite de 90 lentilles\", \"quantity\": 1, \"amount\": 80.0}, {\"description\": \"CIBA VISION DAILIES TOTAL 1 90L \\u00d814.1 (8.5) Boite de 90 lentilles\", \"quantity\": 1, \"amount\": 80.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27670038_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27670038_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"592653489\", \"prescripteur_finess_number\": \"591970843\", \"doctor_name\": \"DASSIO Davide\", \"total_billed\": 90.0, \"bill_paid\": true, \"amount_paid\": 90.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 90.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"POTIER Lya\", \"beneficiary_dob\": \"14-04-2004\", \"invoice_date\": \"26-07-2023\", \"security_number\": \"204045935096497\", \"invoice_issuer\": \"Optique BLANDIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acuvue Oasys with Hydraclear plus 12L \\u00d814.0 (8.4)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.0}, {\"description\": \"Acuvue Oasys with Hydraclear plus 12L \\u00d814.0 (8.4)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27670915_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"592620074\", \"prescripteur_finess_number\": \"591173505\", \"doctor_name\": \"MONTAGNE\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DAHMANE HAMID\", \"insured_dob\": \"02-09-1981\", \"beneficiary_name\": \"DAHMANE HAMID\", \"beneficiary_dob\": \"02-09-1981\", \"invoice_date\": \"13-07-2023\", \"security_number\": \"181095912201968\", \"invoice_issuer\": \"Regard de moi...\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ONE MOIST AST 30\", \"quantity\": 1, \"amount\": 35.0}, {\"description\": \"ONE MOIST AST 30\", \"quantity\": 1, \"amount\": 35.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27671269_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27671269_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"932658057\", \"doctor_name\": \"SAINT CYR SANTE\", \"prescripteur_finess_number\": \"750063844\", \"total_billed\": 380.0, \"bill_paid\": true, \"amount_paid\": 380.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KADOUR NAOUEL\", \"beneficiary_dob\": \"23-01-1977\", \"invoice_date\": \"06-06-2023\", \"security_number\": \"277019935240562\", \"invoice_issuer\": \"OPTIQUE SAINT OUEN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 DAILIES TOTAL ONE ASTIGMATIS 30 HORS TIPS\", \"quantity\": 2, \"amount\": 380.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27672722_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752610444\", \"doctor_name\": null, \"total_billed\": 150.0, \"bill_paid\": true, \"amount_paid\": 150.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"ANDRIANTSOA MISANTSOA\", \"insured_dob\": \"13-12-1985\", \"beneficiary_name\": \"ANDRIANTSOA MISANTSOA\", \"beneficiary_dob\": \"13-12-1985\", \"invoice_date\": \"03-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Optique mutualiste LACHAMBEAUDIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OEIL DROIT - JOHNSON & JOHNSON - 1-DAY ACUVUE MOIST ASTIGM90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 75.0}, {\"description\": \"OEIL GAUCHE - JOHNSON & JOHNSON - 1-DAY ACUVUE MOIST ASTIGM90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 75.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27672725_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752610444\", \"doctor_name\": null, \"total_billed\": 150.0, \"bill_paid\": true, \"amount_paid\": 150.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 150.0, \"remaining_payment\": null, \"insured_name\": \"ANDRIANTSOA MISANTSOA\", \"insured_dob\": \"13-12-1985\", \"beneficiary_name\": \"ANDRIANTSOA MISANTSOA\", \"beneficiary_dob\": \"13-12-1985\", \"invoice_date\": \"03-07-2023\", \"security_number\": null, \"invoice_issuer\": \"ECOUTER VOIR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OEIL DROIT - JOHNSON & JOHNSON - 1MA011 - 1-DAY ACUVUE MOIST ASTIGM90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 75.0}, {\"description\": \"OEIL GAUCHE - JOHNSON & JOHNSON - 1MA011 - 1-DAY ACUVUE MOIST ASTIGM90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 75.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27673604_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"141099838\", \"rpps_number\": null, \"finess_number\": \"752614958\", \"doctor_name\": \"GENERIQUE MEDECIN\", \"total_billed\": 100.8, \"bill_paid\": true, \"amount_paid\": 100.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PAPELIER Manon\", \"beneficiary_dob\": null, \"invoice_date\": \"05-05-2023\", \"security_number\": \"2 89 11 92 023 047\", \"invoice_issuer\": \"GrandOptical PARIS REPUBLIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - 1-DAY ACUVUE\\u00ae MOIST\\u00ae vente par 30 Dur\\u00e9e de vie:Journali\\u00e8re (PU:16,80\\u20ac)SoupleJohnson & JohnsonBoite De 30 Lentilles\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.4}, {\"description\": \"Lentille G - 1-DAY ACUVUE\\u00ae MOIST\\u00ae vente par 30 Dur\\u00e9e de vie:Journali\\u00e8re (PU:16,80\\u20ac)SoupleJohnson & JohnsonBoite De 30 Lentilles\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.4}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27675236_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"691095582\", \"rpps_number\": null, \"finess_number\": \":692699291\", \"doctor_name\": \"ROUANET PASCAL\", \"total_billed\": 150.1, \"bill_paid\": true, \"amount_paid\": 150.1, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"AUBERT AXEL\", \"insured_dob\": \"14-02-1984\", \"beneficiary_name\": \"AUBERT AXEL\", \"beneficiary_dob\": \"14-02-1984\", \"invoice_date\": \"13-07-2023\", \"security_number\": \"1840214047039 53\", \"invoice_issuer\": \"I&D OPTICIENS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Air Optix Plus Hydraglyde for Astigmatism 6L\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 62.3}, {\"description\": \"Air Optix Plus Hydraglyde for Astigmatism 6L\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 62.3}, {\"description\": \"Produits Lentilles\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 25.5}, {\"description\": \"NOVACEL CONTACT BINOVA INTENSITY PACK\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27678390_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"381052059\", \"rpps_number\": \"10002988946\", \"finess_number\": \"382602894\", \"doctor_name\": \"ORSET Pierre\", \"total_billed\": 180.0, \"bill_paid\": true, \"amount_paid\": 180.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"M. MARTENON ALLAN\", \"insured_dob\": \"16-02-1997\", \"beneficiary_name\": \"M. MARTENON ALLAN\", \"beneficiary_dob\": \"16-02-1997\", \"invoice_date\": \"15-07-2023\", \"security_number\": \"197023842129695\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson & Johnson Medical SAS - ACUVUE\\u00ae OASYS with HYDRACLEAR\\u00ae Plus 12P - 14/8.4 - BOITE DE 12\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Johnson & Johnson Medical SAS - ACUVUE\\u00ae OASYS with HYDRACLEAR\\u00ae Plus 12P - 14/8.4 - BOITE DE 12\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27679188_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752610444\", \"doctor_name\": null, \"total_billed\": 150.0, \"bill_paid\": true, \"amount_paid\": 150.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 0.0, \"client_part\": 150.0, \"remaining_payment\": 150.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ANDRIANTSOA MISANTSOA\", \"beneficiary_dob\": \"13-12-1985\", \"invoice_date\": \"03-07-2023\", \"security_number\": \"285129933305092\", \"invoice_issuer\": \"ECOUTER VOIR Optique mutualiste LACHAMBEAUDIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OEIL DROIT -4.75 (-2.25) Axe -20\\u00b0 Add :0 JOHNSON & JOHNSON - 1MA011 - 1-DAY ACUVUE MOIST ASTIGMA90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 75.0}, {\"description\": \"OEIL GAUCHE -4.00 (-1.75) Axe -160\\u00b0 Add :0 JOHNSON & JOHNSON - 1MA011 - 1-DAY ACUVUE MOIST ASTIGMA90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 75.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27679600_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"751938804\", \"rpps_number\": \"10000794072\", \"finess_number\": \"752603480\", \"doctor_name\": \"Dr BEGUE Jean Francois\", \"total_billed\": 90.0, \"bill_paid\": true, \"amount_paid\": 90.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"RETIF STEPHANIE\", \"insured_dob\": \"06-12-1977\", \"beneficiary_name\": \"RETIF STEPHANIE\", \"beneficiary_dob\": \"06-12-1977\", \"invoice_date\": \"17-06-2023\", \"security_number\": \"277129402803980\", \"invoice_issuer\": \"Optic 2000 Auteuil\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ALCON - Dia confort DAC+ X30 - 14/8.7 - BOITE DE 30\", \"quantity\": 3, \"date_of_service\": \"17-06-2023\", \"mandatory_coverage\": 12.0, \"amount\": 45.0}, {\"description\": \"ALCON - Dia confort DAC+ X30 - 14/8.7 - BOITE DE 30\", \"quantity\": 3, \"date_of_service\": \"17-06-2023\", \"mandatory_coverage\": 12.0, \"amount\": 45.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27680116_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"adeli_number\": \"011807252\", \"rpps_number\": \"10100055208\", \"finess_number\": null, \"doctor_name\": \"Pascal ROUANET\", \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BERNARD Marie-line\", \"beneficiary_dob\": null, \"invoice_date\": \"25-07-2023\", \"security_number\": null, \"invoice_issuer\": \"AINVISION\", \"currency\": null, \"items\": [{\"description\": \"ALCON VISION CARE - AIR OPTIX plus HydraGlyde\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"ALCON VISION CARE - AIR OPTIX plus HydraGlyde for ASTIGMATISM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27680987_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132657230\", \"doctor_name\": null, \"total_billed\": 92.0, \"bill_paid\": true, \"amount_paid\": 92.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIVIERE Mickael\", \"beneficiary_dob\": null, \"invoice_date\": \"28-07-2023\", \"security_number\": null, \"invoice_issuer\": \"SARL LILTI VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPH OPHTALMIK HR 1 DAY SOL diam : 14.1; ray : 8.6; soupie, journalier, R\\u00e9v: 1 j/an (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 46.0}, {\"description\": \"OPH OPHTALMIK HR 1 DAY SOL diam : 14.1; ray : 8.6; soupie, journalier, R\\u00e9v: 1 j/an (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 46.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27681102_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"771713351\", \"rpps_number\": null, \"finess_number\": \"772605531\", \"doctor_name\": \"LINDA HRARAT\", \"total_billed\": 115.0, \"bill_paid\": true, \"amount_paid\": 115.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"PATOUX St\\u00e9phanie\", \"insured_dob\": null, \"beneficiary_name\": \"NOCUM Lea\", \"beneficiary_dob\": null, \"invoice_date\": \"29-07-2023\", \"security_number\": \"1 73 01 77 284 057\", \"invoice_issuer\": \"GrandOptical CLAYE-SOUILLY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - TOTAL 30 6L Incolore Dur\\u00e9e de vie:Mensuelle (PU:57,50\\u20ac)Souple/Alcon Vision Care/Bo\\u00eete De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 57.5}, {\"description\": \"Lentille G - TOTAL 30 6L Incolore Dur\\u00e9e de vie:Mensuelle (PU:57,50\\u20ac)Souple/Alcon Vision Care/Bo\\u00eete De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 57.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27684124_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"prescripteur_finess_number\": null, \"doctor_name\": null, \"total_billed\": 19.95, \"bill_paid\": true, \"amount_paid\": 19.95, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lopez VIRGINIE\", \"beneficiary_dob\": null, \"invoice_date\": \"18-07-2023\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Corrections OEIL GAUCHE Boite de 6 / Diam\\u00e8tre : 14,00 / Rayon : 8.60 / Puissance : -1,50\", \"quantity\": 1, \"date_of_service\": \"18-07-2023\", \"amount\": 19.95}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27686118_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27686118_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"741019095\", \"rpps_number\": \"10003546578\", \"finess_number\": \"742600026\", \"doctor_name\": \"Jean-Fran\\u00e7ois RAYNAUD\", \"total_billed\": 460.0, \"bill_paid\": true, \"amount_paid\": 460.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. CHEVALIER KEVIN\", \"beneficiary_dob\": null, \"invoice_date\": \"25-07-2023\", \"security_number\": null, \"invoice_issuer\": \"L A O LABORATOIRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LENTILLE TOP LAO\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 230.0}, {\"description\": \"LENTILLE TOP LAO\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 230.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27686489_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27686489_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"932658057\", \"prescripteur_finess_number\": \"750063844\", \"doctor_name\": \"SAINT CYR SANTE\", \"total_billed\": 360.0, \"bill_paid\": true, \"amount_paid\": 360.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 360.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KADOUR MEHDI\", \"beneficiary_dob\": \"17-07-2005\", \"invoice_date\": \"06-06-2023\", \"security_number\": \"277019935240562\", \"invoice_issuer\": \"OPTIQUE SAINT OUEN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SOFLENS DAILIES DISPOSABLES 90 HORS TIPS\", \"quantity\": 2, \"date_of_service\": \"06-06-2023\", \"mandatory_coverage\": null, \"amount\": 180.0}, {\"description\": \"SOFLENS DAILIES DISPOSABLES 90 HORS TIPS\", \"quantity\": 2, \"date_of_service\": \"06-06-2023\", \"mandatory_coverage\": null, \"amount\": 180.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27687795_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"352612477\", \"doctor_name\": null, \"total_billed\": 63.98, \"bill_paid\": true, \"amount_paid\": 63.98, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Valerie AGNEL\", \"beneficiary_dob\": null, \"invoice_date\": \"09-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Vision Direct\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Dailies Total 1 90 Pack Oeil : Gauche Rayon : 8.5 Diam\\u00e8tre : 14.1 Puissance : -1.50\", \"quantity\": 90, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27691223_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"932634215\", \"prescripteur_finess_number\": \"750010555\", \"doctor_name\": \"LAM Delphine\", \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GAILLARD Helene\", \"beneficiary_dob\": null, \"invoice_date\": \"21-12-2022\", \"security_number\": null, \"invoice_issuer\": \"BISMUTH FRERES OPTICIENS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CIBA VISION DAILIES TOTAL 1 90L \\u00d814.1 (8.5) Boite de 90 lentilles\", \"quantity\": 1, \"amount\": 80.0}, {\"description\": \"CIBA VISION DAILIES TOTAL 1 90L \\u00d814.1 (8.5) Boite de 90 lentilles\", \"quantity\": 1, \"amount\": 80.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27694297_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"472801398\", \"prescripteur_adeli_number\": \"471020909\", \"rpps_number\": null, \"finess_number\": \"472696178\", \"doctor_name\": \"Dr DAURIN Jean\", \"total_billed\": 41.0, \"bill_paid\": true, \"amount_paid\": 41.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"NAKACHE JOSETTE\", \"insured_dob\": \"05-02-1953\", \"beneficiary_name\": \"NAKACHE JOSETTE\", \"beneficiary_dob\": \"05-02-1953\", \"invoice_date\": \"08-08-2023\", \"security_number\": \"2 53 02 99 353 726 30\", \"invoice_issuer\": \"ALAIN AFFLELOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Alcon - Dailies AquaComfort Plus Multifocal HIGH 30L - 14/8.7 - BOITE DE 30\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 41.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27694301_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27694301_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"doctor_name\": null, \"total_billed\": 114.9, \"bill_paid\": true, \"amount_paid\": 114.9, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mathys GONZALO\", \"beneficiary_dob\": null, \"invoice_date\": \"30-07-2023\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"MyDay Daily disposable 90 Corrections OEIL DROIT Boite de 90 / Diam\\u00e8tre : 14,20 / Rayon : 8.40 / Puissance : -3,00\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 57.45}, {\"description\": \"MyDay Daily disposable 90 Corrections OEIL GAUCHE Boite de 90 / Diam\\u00e8tre : 14,20 / Rayon : 8.40 / Puissance : -3,25\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 57.45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27696929_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"782652341\", \"prescripteur_finess_number\": null, \"doctor_name\": null, \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. GARCON JEAN LOUIS\", \"beneficiary_dob\": \"12-06-1979\", \"invoice_date\": \"23-12-2022\", \"security_number\": \"179067858602208\", \"invoice_issuer\": \"ALAINAFFLELOU PHILOPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Coopervision - Eph\\u00e9m\\u00e8re Confort Silicone Hydrogel 30L - BOITE DE 30\", \"quantity\": 2, \"date_of_service\": \"23-12-2022\", \"amount\": 30.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27699488_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"972641872\", \"doctor_name\": null, \"total_billed\": 20.8, \"bill_paid\": true, \"amount_paid\": 20.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GOURSALO Stephane\", \"beneficiary_dob\": null, \"invoice_date\": \"20-06-2023\", \"security_number\": null, \"invoice_issuer\": \"Maneo St Pierre\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE OASYS WITH HYDRACLEAR PLUS 6 (N/A, Journalier, 7 jours, par 6, Dia.:14, Ray.:8.40, Sph.:-3.25, Cyl.:0.00, Axe:0, Add:0.00)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27699662_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"950044651\", \"rpps_number\": \"10001239689\", \"finess_number\": \"312622921\", \"doctor_name\": \"Dr LEVY Patrick\", \"total_billed\": 220.0, \"bill_paid\": true, \"amount_paid\": 128.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 92.0, \"client_part\": 128.0, \"remaining_payment\": null, \"insured_name\": \"SANCHEZ STEPHANIE\", \"insured_dob\": \"07-05-1981\", \"beneficiary_name\": \"SANCHEZ STEPHANIE\", \"beneficiary_dob\": \"07-05-1981\", \"invoice_date\": \"03-06-2023\", \"security_number\": \"281050912201646\", \"invoice_issuer\": \"Optic 2000 Caz\\u00e8res\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CooperVision - Dia Intensity Torique (30) - 14.3/8.6 - BOITE DE 30\", \"quantity\": 5, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 110.0}, {\"description\": \"CooperVision - Dia Intensity Torique (30) - 14.3/8.6 - BOITE DE 30\", \"quantity\": 5, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 110.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27700117_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"NSOTA MBANGO Thomas\", \"total_billed\": 100, \"bill_paid\": true, \"amount_paid\": 100, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MME HARASYMOWICZ LAETITIA\", \"beneficiary_dob\": null, \"invoice_date\": \"18-08-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OPHTALMOLOGIE Docteurs NSOTA MBANGO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Forfait adaptation lentilles\", \"quantity\": null, \"date_of_service\": \"18-08-2023\", \"mandatory_coverage\": null, \"amount\": 100}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27700403_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"331713172\", \"rpps_number\": null, \"finess_number\": \"642620629\", \"doctor_name\": \"SANDRINE COSTA-BURGUETE\", \"total_billed\": 41.2, \"bill_paid\": true, \"amount_paid\": 41.2, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHATEAURAYNAUD Marion\", \"beneficiary_dob\": null, \"invoice_date\": \"21-08-2023\", \"security_number\": \"2940294052273\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique PAU LESCAR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - iWEAR OXYGEN ASTIGMATISM Boite de 3 Dur\\u00e9e de vie:Mensuelle (PU:17,90\\u20ac)SimpleCoopervisionBoite De 3 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 17.9}, {\"description\": \"Lentille G - iWEAR OXYGEN ASTIGMATISM Boite de 3 Dur\\u00e9e de vie:Mensuelle (PU:17,90\\u20ac)SimpleCoopervisionBoite De 3 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 17.9}, {\"description\": \"iWear Multibalance 240ml/AV1\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 6.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27700513_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132601790\", \"prescripteur_finess_number\": \"131202392\", \"doctor_name\": \"VITTE Serge\", \"total_billed\": 136.0, \"bill_paid\": true, \"amount_paid\": 136.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 136.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DAL-COLLETTO Jean Pascal\", \"beneficiary_dob\": \"24-07-1953\", \"invoice_date\": \"29-07-2023\", \"security_number\": \"153072003303967\", \"invoice_issuer\": \"ALAINAFFLELOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1-DAY ACUVUE\\u00ae MOIST 90P \\u00d814.2 (8.5)\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 136.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27707015_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752614057\", \"doctor_name\": null, \"total_billed\": 60.8, \"bill_paid\": true, \"amount_paid\": 60.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"M. CAUDOUX Xavier\", \"insured_dob\": null, \"beneficiary_name\": \"M. CAUDOUX Xavier\", \"beneficiary_dob\": null, \"invoice_date\": \"22-08-2023\", \"security_number\": \"1 71 07 75 112 196\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique PARIS SAINT LAZARE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - ACUVUE\\u00ae OASYS\\u00ae for ASTIGMATISM vente par 6 Dur\\u00e9e de vie:Bi-mensuelle (PU 30,40\\u20ac)Scuple Johnson & JohnsonBoite De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.4}, {\"description\": \"Lentille G - ACUVUE\\u00ae OASYS\\u00ae with HYDRACLEAR\\u00ae Plus vente par 6 Dur\\u00e9e de vie:Bi-mensuelle (PU 20,40\\u20ac)Scuple Johnson & JohnsonBoite De 6 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.4}, {\"description\": \"CARTE FIDELITE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 10.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27708923_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"622698066\", \"prescripteur_finess_number\": \"591812755\", \"doctor_name\": \"SABATIER MA MATHIEU\", \"total_billed\": 51.1, \"bill_paid\": true, \"amount_paid\": 51.1, \"complementary_coverage\": null, \"client_part\": 51.1, \"remaining_payment\": 0, \"insured_name\": \"DEBERT ROMAIN\", \"insured_dob\": \"20-09-2005\", \"beneficiary_name\": \"DEBERT ROMAIN\", \"beneficiary_dob\": \"20-09-2005\", \"invoice_date\": \"07-06-2022\", \"security_number\": \"175095929502021\", \"invoice_issuer\": \"Optic de La Lys J. BOYAVALLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson & Johnson ACUVUE OASYS HYDRACLEAR PLUS 12L 4J \\u00d814 (8.4) Boite de 12\", \"amount\": 51.1}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27709440_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"782664478\", \"doctor_name\": null, \"total_billed\": 159.0, \"bill_paid\": true, \"amount_paid\": 159.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Eric DETANTE\", \"insured_dob\": \"21-12-1962\", \"beneficiary_name\": \"Eric DETANTE\", \"beneficiary_dob\": \"21-12-1962\", \"invoice_date\": \"18-08-2023\", \"security_number\": \"162127511441769\", \"invoice_issuer\": \"LES OPTICIENS KRYS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ophthalmic Compagnie-33 rue des Vanesses-FR-94420 Villepinte OPHTAMIC HR PROG LOW 7,40<-K0n<+8,10 / Addition LOW: jusqu'\\u00e0 +2,25d / Acuit\\u00e9 visuelle Haute R\\u00e9solution / Pr\\u00e9coniser la solution Jazz AquaSensitive. Quantit\\u00e9: 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 79.5}, {\"description\": \"Ophthalmic Compagnie-33 rue des Vanesses-FR-94420 Villepinte OPHTAMIC HR PROG LOW 7,40<-K0n<+8,10 / Addition LOW: jusqu'\\u00e0 +2,25d / Acuit\\u00e9 visuelle Haute R\\u00e9solution / Pr\\u00e9coniser la solution Jazz AquaSensitive. Quantit\\u00e9: 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 79.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27709682_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27709682_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"prescripteur_finess_number\": null, \"doctor_name\": null, \"total_billed\": 49.7, \"bill_paid\": true, \"amount_paid\": 49.7, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Miguel MORICHEAU\", \"beneficiary_dob\": null, \"invoice_date\": \"15-08-2023\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE SNC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Air Optix Plus Hydraglyde Corrections OEIL GAUCHE\", \"quantity\": 1, \"amount\": 24.85}, {\"description\": \"Air Optix Plus Hydraglyde Corrections OEIL DROIT\", \"quantity\": 1, \"amount\": 24.85}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27709720_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"911012201\", \"rpps_number\": \"10100669661\", \"finess_number\": \"912600574\", \"doctor_name\": \"JOUVE Lea\", \"total_billed\": 141.0, \"bill_paid\": true, \"amount_paid\": 141.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"M. CORVAGLIO ANTONIO\", \"insured_dob\": \"18-08-2000\", \"beneficiary_name\": \"M. CORVAGLIO ANTONIO\", \"beneficiary_dob\": \"18-08-2000\", \"invoice_date\": \"01-08-2023\", \"security_number\": \"100089122304997\", \"invoice_issuer\": \"ALAINAFFLELOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson & Johnson - 1 Day Acuvue\\u00ae Moist\\u00ae 90L - 14.2/8.5 - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.5}, {\"description\": \"Johnson & Johnson - 1 Day Acuvue\\u00ae Moist\\u00ae 90L - 14.2/8.5 - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27712494_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752602631\", \"doctor_name\": null, \"total_billed\": 38.0, \"bill_paid\": true, \"amount_paid\": 38.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"BIHAN YOHAN\", \"insured_dob\": \"07-04-2001\", \"beneficiary_name\": \"BIHAN YOHAN\", \"beneficiary_dob\": \"07-04-2001\", \"invoice_date\": \"29-12-2022\", \"security_number\": \"264047718300620\", \"invoice_issuer\": \"Optique Jeanne D'arc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMO - ACUVUE\\u00ae RevitaLens Flacon 360ml + 1 \\u00e9tui\", \"quantity\": 1, \"amount\": 13.0}, {\"description\": \"MANIPULATION LC\", \"quantity\": 1, \"amount\": 25.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27714273_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"682685748\", \"doctor_name\": null, \"total_billed\": 148.0, \"bill_paid\": true, \"amount_paid\": 148.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. TAVAN LUCA\", \"beneficiary_dob\": \"25-10-1998\", \"invoice_date\": \"19-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Optic 2000 Hirsingue\", \"currency\": \"EUR\", \"items\": [{\"description\": \"JOHNSON & JOHNSON - 1-DAY ACUVUE\\u00ae MOIST\\u00ae - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 74.0}, {\"description\": \"JOHNSON & JOHNSON - 1-DAY ACUVUE\\u00ae MOIST\\u00ae - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 74.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27714295_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"352810824\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maxime Pinto\", \"total_billed\": 75.98, \"bill_paid\": true, \"amount_paid\": 75.98, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARIE-JOSEPHE VIDAL\", \"beneficiary_dob\": null, \"invoice_date\": \"20-08-2023\", \"security_number\": null, \"invoice_issuer\": \"LENSTORE.fr\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Dailies Total 1 Multifocal\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 37.99}, {\"description\": \"Dailies Total 1 Multifocal\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 37.99}, {\"description\": \"Colissimo\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27716093_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"062602107\", \"prescripteur_finess_number\": null, \"doctor_name\": null, \"total_billed\": 185.0, \"bill_paid\": true, \"amount_paid\": 185.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUILLEMIN Laetitia\", \"beneficiary_dob\": null, \"invoice_date\": \"10-08-2023\", \"security_number\": \"277071305536680\", \"invoice_issuer\": \"Laboratoire PROCONTACT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact \\u00e0 renouvellement fr\\u00e9quent\", \"amount\": 185.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27716094_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Patrick DESPREZ\", \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laetitia GUILLEMIN\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"CABINET MEDICO-CHIRURGICAL D'OPHTALMOLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Air Optix Multifocal HydraGlyde\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27718398_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"922618913\", \"prescripteur_finess_number\": \"921925665\", \"doctor_name\": \"LALLOUM FRANCK\", \"total_billed\": 116.0, \"bill_paid\": true, \"amount_paid\": 116.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"CHEZY ANNE\", \"insured_dob\": \"20-11-1976\", \"beneficiary_name\": \"CHEZY ANNE\", \"beneficiary_dob\": \"20-11-1976\", \"invoice_date\": \"23-08-2023\", \"security_number\": \"276110848004623\", \"invoice_issuer\": \"OPTICIEN PRIVE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 Acuvue Oasys with Hydraclear plus 6L Incolore \\u00d81 4 (8.4)\", \"quantity\": 1, \"amount\": 58.0}, {\"description\": \"2 Acuvue Oasys with Hydraclear plus 6L Incolore \\u00d81 4 (8.4)\", \"quantity\": 1, \"amount\": 58.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27718628_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"352612477\", \"prescripteur_finess_number\": null, \"doctor_name\": null, \"total_billed\": 201.52, \"bill_paid\": true, \"amount_paid\": 201.52, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Oph\\u00e9lie VIRELIZIER\", \"beneficiary_dob\": null, \"invoice_date\": \"31-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Vision Direct\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acuvue Oasys for Astigmatism\", \"quantity\": 24, \"amount\": 111.96}, {\"description\": \"Acuvue Oasys for Astigmatism\", \"quantity\": 24, \"amount\": 111.96}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/contrat-de-travail-franaais-anglais_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/facture-engie-page-4_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8c0d32a4-a14c-4294-9130-d34cc8fcdffa_Piece_identite__livret_famille-2_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/287b78bf-1861-467f-973f-21aad6be26f1_livret_de_famille_Humez_25022024-3_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/imag222afd2f2q2f22df2d2es_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/AM MP et enfant 2-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/preview-wbp6149-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27719022_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"601008394\", \"rpps_number\": null, \"finess_number\": \"602621518\", \"doctor_name\": \"ESTELLE BELIN-DE SA\", \"total_billed\": 139.2, \"bill_paid\": true, \"amount_paid\": 94.2, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 45, \"client_part\": 94.2, \"remaining_payment\": 0, \"insured_name\": \"CASTAN ELODIE\", \"insured_dob\": null, \"beneficiary_name\": \"CASTAN ELODIE\", \"beneficiary_dob\": null, \"invoice_date\": \"22-04-2023\", \"security_number\": \"2 92 04 12 202 088\", \"invoice_issuer\": \"Grand Optical BEAUVAIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - DAILIES TOTAL 1 90L Incolore Dur\\u00e9e de vie Journali\\u00e8re (PU:69.60\\u20ac) Souple/Alcon Vision Care/Bo\\u00eete De 90 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.6}, {\"description\": \"Lentille G - DAILIES TOTAL 1 90L Incolore Dur\\u00e9e de vie Journali\\u00e8re (PU:69.60\\u20ac) Souple/Alcon Vision Care/Bo\\u00eete De 90 Lentilles\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 69.6}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27719266_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"012600920\", \"doctor_name\": null, \"total_billed\": 180.0, \"bill_paid\": true, \"amount_paid\": 180.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TUIPULOTU Ismael\", \"beneficiary_dob\": null, \"invoice_date\": \"17-07-2023\", \"security_number\": null, \"invoice_issuer\": \"C C Intermarche\", \"currency\": \"EUR\", \"items\": [{\"description\": \"JNJ (ATO) ACUVUE\\u00ae OASYS 1 DAY FOR ASTIGMATISM (gamme myopie) 90P, diam : 14.3, ray : 8.5, souple, Journalier, Ren: 1 jour (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"JNJ (ATO) ACUVUE\\u00ae OASYS 1 DAY FOR ASTIGMATISM (gamme myopie) 90P, diam : 14.3, ray : 8.5, souple, Journalier, Ren: 1 jour (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture des lentilles de contact-27720265_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752615948\", \"prescripteur_finess_number\": null, \"doctor_name\": null, \"total_billed\": 86.0, \"bill_paid\": true, \"amount_paid\": 86.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VINCENT BOCA\", \"beneficiary_dob\": \"17-06-1972\", \"invoice_date\": \"18-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Optical Center\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ALCON AIR OPTIX PLUS HYDRAGLYDE FOR ASTIGMATISM lentilles Sph\\u00e8re : -3.25 - Rayon (BC) : 8.70 - Diam\\u00e8tre (DIA) : 14.50 - Cylindre : -1.25 - Axe : 10.00 - Oeil gauche\", \"quantity\": 1, \"amount\": 43.0}, {\"description\": \"ALCON AIR OPTIX PLUS HYDRAGLYDE FOR ASTIGMATISM lentilles Sph\\u00e8re : -1.75 - Rayon (BC) : 8.70 - Diam\\u00e8tre (DIA) : 14.50 - Cylindre : -2.25 - Axe : 10.00 - Oeil droit\", \"quantity\": 1, \"amount\": 43.0}]}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771477_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590003570\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Justine LEDUC\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. LECOUVEZ Louis\", \"beneficiary_dob\": null, \"invoice_date\": \"08-09-2023\", \"security_number\": \"196095900924077\", \"invoice_issuer\": \"Justine LEDUC- Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"08-09-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771478_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"850004110\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas COLIN\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GALY Henri\", \"beneficiary_dob\": null, \"invoice_date\": \"30-08-2023\", \"security_number\": \"134086613601009\", \"invoice_issuer\": \"Thomas COLIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771479_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"850004110\", \"doctor_name\": \"Thomas COLIN\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"insured_name\": null, \"beneficiary_name\": \"GALY Henri\", \"invoice_date\": \"19-08-2023\", \"security_number\": \"134086613601009\", \"invoice_issuer\": \"Thomas COLIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"amount\": 55}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771480_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"510002371\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LABORDERE Audrey\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chantal VERNEY\", \"beneficiary_dob\": \"06-06-1946\", \"invoice_date\": \"01-09-2023\", \"security_number\": \"246063605800866\", \"invoice_issuer\": \"LABORDERE Audrey\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-09-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771553_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"2A0000063\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Agostini J\\u00e9r\\u00f4me\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gerard Bouchard\", \"beneficiary_dob\": null, \"invoice_date\": \"28-07-2023\", \"security_number\": \"1540121485001\", \"invoice_issuer\": \"Agostini J\\u00e9r\\u00f4me\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"28-07-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771557_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000081\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DE ASSUNCAO David\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GENAY FABIEN\", \"beneficiary_dob\": \"28-10-1981\", \"invoice_date\": \"11-09-2023\", \"security_number\": null, \"invoice_issuer\": \"DE ASSUNCAO David\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires\", \"quantity\": null, \"date_of_service\": \"11-09-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771564_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"190000844\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CORNIELLE Fr\\u00e9d\\u00e9ric\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOURGOIN S\\u00e9verine\", \"beneficiary_dob\": null, \"invoice_date\": \"14-09-2023\", \"security_number\": \"271059402817529\", \"invoice_issuer\": \"CORNIELLE Fr\\u00e9d\\u00e9ric\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention ost\\u00e9opathique(s)\", \"quantity\": 1, \"date_of_service\": \"14-09-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771572_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630005072\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Luka FERNANDES\", \"total_billed\": 40.0, \"bill_paid\": true, \"amount_paid\": 40.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emma LAMANILEVE\", \"beneficiary_dob\": \"11-01-2003\", \"invoice_date\": \"08-09-2023\", \"security_number\": \"203016311318225\", \"invoice_issuer\": \"Luka FERNANDES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"08-09-2023\", \"mandatory_coverage\": null, \"amount\": 40.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27771689_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"2A0001129\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GUY Fabien\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Meurisse Elisa\", \"beneficiary_dob\": null, \"invoice_date\": \"11-09-2023\", \"security_number\": null, \"invoice_issuer\": \"GUY Fabien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-09-2023\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27772607_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"317818433\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mr Crouail Eric\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JOUERY GILBERT\", \"beneficiary_dob\": null, \"invoice_date\": \"31-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Mr Crouail Eric\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27773374_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770000453\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Malfay Christophe\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mr COULY Daniel\", \"beneficiary_dob\": null, \"invoice_date\": \"04-09-2023\", \"security_number\": \"1510718033080\", \"invoice_issuer\": \"Malfay Christophe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-09-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27774161_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"280000860\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Rapha\\u00ebl FICINI\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"Mme LEGENDRE Carole\", \"beneficiary_dob\": \"28-08-1973\", \"invoice_date\": \"15-09-2023\", \"security_number\": null, \"invoice_issuer\": \"FICINI Rapha\\u00ebl E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation \\u00e0 mon cabinet pour un traitement ost\\u00e9opathique suite \\u00e0 un bilan avec lombo-sacralgie + \\u00e9paule droite\", \"quantity\": 1, \"date_of_service\": \"15-09-2023\", \"mandatory_coverage\": null, \"amount\": 45}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27775261_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590006029\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lola Fresco\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CLAUW Christelle\", \"beneficiary_dob\": null, \"invoice_date\": \"19-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Lola Fresco\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-09-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27775827_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"380011601\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VITTOZ Lucie\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"beneficiary_name\": \"Jean-Marie FAURE\", \"beneficiary_dob\": \"20-07-1986\", \"invoice_date\": \"08-09-2023\", \"security_number\": null, \"invoice_issuer\": \"VITTOZ Lucie Ost\\u00e9opathe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-09-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27775848_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"600001812\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Margaux Gonzalez\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEGUYADER Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Margaux Gonzalez\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"11-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27775877_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640004040\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Manon LANDELLE\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MILLET Agla\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"09-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Manon LANDELLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"09-08-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27776107_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130024219\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florian LAFFORGUE\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Mulette FREDERICK\", \"insured_dob\": null, \"beneficiary_name\": \"Mulette FREDERICK\", \"beneficiary_dob\": null, \"invoice_date\": \"27-07-2023\", \"security_number\": \"1741198818025\", \"invoice_issuer\": \"Florian LAFFORGUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-07-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27776257_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950001024\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fran\\u00e7ois LAMOURET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VANNIER Yvette\", \"beneficiary_dob\": null, \"invoice_date\": \"06-09-2023\", \"security_number\": \"236053531800248\", \"invoice_issuer\": \"Fran\\u00e7ois LAMOURET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-09-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27776273_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340004910\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HERNANDEZ Celine\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOSCH Cyril\", \"beneficiary_dob\": \"03-03-1977\", \"invoice_date\": \"23-08-2023\", \"security_number\": null, \"invoice_issuer\": \"C\\u00e9line HERNANDEZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de BOSCH Cyril le 23/08/2023\", \"quantity\": 1, \"date_of_service\": \"23-08-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27776417_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"500001425\", \"rpps_number\": \"10005402960\", \"finess_number\": null, \"doctor_name\": \"Guillaume BAEHR\", \"total_billed\": 65.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 65.0, \"insured_name\": \"Mme Annick BESTILLE\", \"insured_dob\": \"24-04-1952\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"11-09-2023\", \"security_number\": \"252045021811680\", \"invoice_issuer\": \"Mr Guillaume BAEHR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OS 1+DE:65\", \"quantity\": null, \"date_of_service\": \"11-09-2023\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27776469_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001468\", \"rpps_number\": \"10005917132\", \"doctor_name\": \"Jean-Christophe SCHERER\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"insured_name\": \"Christraem Danielle\", \"beneficiary_name\": \"Christraem Danielle\", \"invoice_date\": \"12-09-2023\", \"security_number\": \"149126232108727\", \"invoice_issuer\": \"Mr Jean-Christophe SCHERER E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-09-2023\", \"amount\": 50}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27776512_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"430000687\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FABIEN CROZATIER\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Christian POURCHER\", \"beneficiary_dob\": null, \"invoice_date\": \"12-09-2023\", \"security_number\": \"1540230189012\", \"invoice_issuer\": \"FABIEN CROZATIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"12-09-2023\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27776613_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810000901\", \"doctor_name\": \"Charlotte GAUTHIER\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"insured_name\": null, \"beneficiary_name\": \"VALAT Mathieu\", \"invoice_date\": \"19-09-2023\", \"invoice_issuer\": \"Charlotte GAUTHIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathy session\", \"amount\": 55}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27777187_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"930005038\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9dric GENISSEL\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ANSQUER Mourice\", \"beneficiary_dob\": null, \"invoice_date\": \"25-08-2023\", \"security_number\": \"135122909000114\", \"invoice_issuer\": \"C\\u00e9dric GENISSEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"25-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27777639_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"600001713\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DESIRA Alexis\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nicolas FRUCHART\", \"beneficiary_dob\": \"08-01-1987\", \"invoice_date\": \"18-09-2023\", \"security_number\": null, \"invoice_issuer\": \"DESIRA Alexis\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27777645_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"310000146\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"JULIEN VIGUIE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DE LABACA MAXIME\", \"beneficiary_dob\": \"26-09-1977\", \"invoice_date\": \"19-09-2023\", \"security_number\": null, \"invoice_issuer\": \"JULIEN VIGUIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-09-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27778049_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"320000250\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Fran\\u00e7ois MARTY\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Margin Magali\", \"beneficiary_dob\": null, \"invoice_date\": \"19-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Jean-Fran\\u00e7ois MARTY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"une consultation ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-09-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27778091_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"650001878\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Chlo\\u00e9 Perez\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"FOURIER Pierre\", \"beneficiary_dob\": \"25-12-1977\", \"invoice_date\": \"30-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Chlo\\u00e9 Perez\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation, et suivi de soins d'ost\\u00e9opathie au cours de la m\\u00eame s\\u00e9ance effectu\\u00e9e ce jour\", \"quantity\": 1, \"date_of_service\": \"30-08-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27778109_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340004670\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TOGNACCINI Romain\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Calley Claude\", \"beneficiary_dob\": null, \"invoice_date\": \"31-08-2023\", \"security_number\": null, \"invoice_issuer\": \"TOGNACCINI Romain, Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation de soins ost\\u00e9opathiques et/ou des conseils \\u00e0 vis\\u00e9s th\\u00e9rapeutiques\", \"quantity\": 1, \"date_of_service\": \"31-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27778112_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"347846495\", \"rpps_number\": \"10005690911\", \"doctor_name\": \"Gr\\u00e9gory AUCLAIR\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alain Bonal\", \"invoice_date\": \"20-07-2023\", \"invoice_issuer\": \"Gr\\u00e9gory AUCLAIR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"20-07-2023\", \"amount\": 50.0}], \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27778142_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330008160\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Audric Imbert\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Beninger Patrick\", \"beneficiary_dob\": \"03-08-1977\", \"invoice_date\": \"01-09-2023\", \"security_number\": null, \"invoice_issuer\": \"IMBERT Audric\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-09-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27778153_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"730000049\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Audrey BERTHIER\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"HEEKENG Francis\", \"insured_dob\": null, \"beneficiary_name\": \"HEEKENG Francis\", \"beneficiary_dob\": null, \"invoice_date\": \"15-09-2023\", \"security_number\": \"191099741134532\", \"invoice_issuer\": \"Audrey BERTHIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-09-23\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27778165_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330018763\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Valentin Bessaguet\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"JULIEN Virginie\", \"beneficiary_dob\": \"02-10-1976\", \"invoice_date\": \"17-07-2023\", \"security_number\": null, \"invoice_issuer\": \"Mr Valentin Bessaguet El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s)\", \"quantity\": 1, \"date_of_service\": \"17-07-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27778240_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350009452\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RENAIS Ad\\u00e9la\\u00efde\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUEGUEN OLIVIER\", \"beneficiary_dob\": \"11-06-1976\", \"invoice_date\": \"14-09-2023\", \"security_number\": \"176063523818481\", \"invoice_issuer\": \"RENAIS Ad\\u00e9la\\u00efde\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27778445_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440000263\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"El FLANDREAU Chlo\\u00eb\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Wetta Marie Jos\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"03-07-2023\", \"security_number\": \"2500767482049\", \"invoice_issuer\": \"El FLANDREAU Chlo\\u00eb\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-07-2023\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27779029_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"250003134\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HANS Xavier\", \"total_billed\": 60.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VENARD Kevin\", \"beneficiary_dob\": null, \"invoice_date\": \"06-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Clinique Ost\\u00e9opathique Besan\\u00e7on\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-09-23\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27780823_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780005534\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sol\\u00e8ne BUISSON\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"BECHU Christopher\", \"beneficiary_dob\": \"04-04-1995\", \"invoice_date\": \"20-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Sol\\u00e8ne BUISSON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"20-09-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27781749_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770004547\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DUONG Justine\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ANDRIEU Enzo\", \"beneficiary_dob\": null, \"invoice_date\": \"20-09-2023\", \"security_number\": null, \"invoice_issuer\": \"DUONG Justine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"20-09-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27781766_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620002311\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marc GUILBERT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAGNIN Melanie\", \"beneficiary_dob\": null, \"invoice_date\": \"15-09-2023\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OST\\u00c9OPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-09-2323\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27781814_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660002189\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Miguel Galan\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bourdon Aurelie\", \"beneficiary_dob\": null, \"invoice_date\": \"20-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Miguel Galan Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27781864_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910000819\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Isabelle MOREAU-HAURY\", \"total_billed\": 63, \"bill_paid\": true, \"amount_paid\": 63, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DE SEDE Arnaud\", \"beneficiary_dob\": null, \"invoice_date\": \"19-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Isabelle MOREAU-HAURY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-09-2023\", \"mandatory_coverage\": null, \"amount\": 63}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27781874_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"930000377\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florence ROSSI\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Maud DELEAU\", \"beneficiary_dob\": null, \"invoice_date\": \"29-07-2023\", \"security_number\": \"295049306424740\", \"invoice_issuer\": \"Florence ROSSI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"29-07-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27781876_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"070000468\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BORDEREAU Julien\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Philic Aurelie\", \"beneficiary_dob\": null, \"invoice_date\": \"09-08-2023\", \"security_number\": \"284021305612166\", \"invoice_issuer\": \"BORDEREAU Julien Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-08-2023\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27782424_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"347000291\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dr Benoit ESCANDE\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BARRANCOS leo\", \"beneficiary_dob\": \"16-12-1995\", \"invoice_date\": \"15-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Dr Benoit ESCANDE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784722_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590009429\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DERUYK C\\u00e9line\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LANIESSE Elodie\", \"beneficiary_dob\": \"23-01-1984\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Ost\\u00e9opathe D.O DERUYK C\\u00e9line\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-09-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784754_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"137083218\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sandrine Paravisini\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Santiago Hemenegilde\", \"beneficiary_dob\": null, \"invoice_date\": \"30-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Sandrine Paradisini\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784755_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830004693\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Romain ASCENCIO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CELINI Lorenzo\", \"beneficiary_dob\": null, \"invoice_date\": \"14-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Romain ASCENCIO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784756_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784756_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830004271\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Souriau Alexis\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Celeni Lorenzo\", \"beneficiary_dob\": \"23-10-2008\", \"invoice_date\": \"19-07-2023\", \"security_number\": \"283028313717212\", \"invoice_issuer\": \"Mr Souriau Alexis Ost\\u00e9opathe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"19-07-2023\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830004271\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Souriau Alexis\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Celeni Ezio\", \"beneficiary_dob\": \"22-05-2014\", \"invoice_date\": \"19-07-2023\", \"security_number\": \"283028313717212\", \"invoice_issuer\": \"Mr Souriau Alexis Ost\\u00e9opathe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"19-07-2023\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784796_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784796_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"357024538\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Patrick CHATEL\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JAMES Franck\", \"beneficiary_dob\": null, \"invoice_date\": \"10-07-2023\", \"security_number\": \"170013523819491\", \"invoice_issuer\": \"Patrick CHATEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-07-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"357024538\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Patrick CHATEL\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JAMES Franck\", \"beneficiary_dob\": null, \"invoice_date\": \"21-06-2023\", \"security_number\": \"170013523819491\", \"invoice_issuer\": \"Patrick CHATEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"21-06-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784833_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"771037801\", \"rpps_number\": \"10000693068\", \"doctor_name\": \"Marc ROZENBLAT\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_dob\": null, \"invoice_date\": \"25-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Docteur Marc ROZENBLAT\", \"currency\": \"EUR\", \"insured_name\": null, \"items\": [{\"description\": \"S\\u00e9ance de M\\u00e9decine Manuelle et Ost\\u00e9opathie M\\u00e9dicale\", \"date_of_service\": \"25-08-2023\", \"amount\": 50}], \"beneficiary_name\": \"DEBOFFLE Nicolas\", \"beneficiary_dob\": \"21-05-1976\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784856_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"757019179\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Marie GEFFROY\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 120.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MALINGRE Thibault\", \"beneficiary_dob\": null, \"invoice_date\": \"06-09-2023\", \"security_number\": \"278065951226307\", \"invoice_issuer\": \"Jean-Marie GEFFROY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-09-2023\", \"mandatory_coverage\": null, \"amount\": 120.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784857_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"757019179\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Marie GEFFROY\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MALINGRE Lucie\", \"beneficiary_dob\": null, \"invoice_date\": \"06-09-2023\", \"security_number\": \"278065951226307\", \"invoice_issuer\": \"Jean-Marie GEFFROY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 120.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784875_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"180000465\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tatiana JEZIORNY\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"HOUE Janine\", \"insured_dob\": null, \"beneficiary_name\": \"HOUE Janine\", \"beneficiary_dob\": null, \"invoice_date\": \"11-09-2023\", \"security_number\": \"249071820704783\", \"invoice_issuer\": \"Tatiana JEZIORNY - OSTEOPATHE D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-09-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784879_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784879_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"567015268\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"\\u00c9ric MIGNARD\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DABET Andr\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"12-09-2023\", \"security_number\": \"136032910600967\", \"invoice_issuer\": \"\\u00c9ric MIGNARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-09-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"567015268\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"\\u00c9ric MIGNARD\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DABET Elisa\", \"beneficiary_dob\": null, \"invoice_date\": \"12-09-2023\", \"security_number\": \"235115609000858\", \"invoice_issuer\": \"\\u00c9ric MIGNARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-09-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784880_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440001428\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emmanuel Angot\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fran\\u00e7ois BABONNEAU\", \"beneficiary_dob\": \"01-05-1965\", \"invoice_date\": \"12-09-2023\", \"security_number\": \"165054410900149\", \"invoice_issuer\": \"Emmanuel Angot\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Facture d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-09-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784881_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910000561\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"No\\u00ebl GALON\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JELNONI Mich\\u00e8le\", \"beneficiary_dob\": null, \"invoice_date\": \"14-09-2023\", \"security_number\": null, \"invoice_issuer\": \"No\\u00ebl GALON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784940_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830011367\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Manon ATTAF\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AUGER Lionel\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Manon ATTAF\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"14-09-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-27784971_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910000470\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Isabelle CHANG\", \"total_billed\": 65, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PINDIVIC Herv\\u00e9\", \"beneficiary_dob\": \"20-03-1968\", \"invoice_date\": null, \"security_number\": \"168032912412193\", \"invoice_issuer\": \"Isabelle CHANG Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": null, \"date_of_service\": \"07-09-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-GED C-D22-1182776_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870000197\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florent MILLE\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DOAT Severine\", \"insured_dob\": null, \"beneficiary_name\": \"DOAT Severine\", \"beneficiary_dob\": null, \"invoice_date\": \"16-09-2023\", \"security_number\": \"277128100402501\", \"invoice_issuer\": \"Florent MILLE\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-GED E-J-2759858_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"647004522\", \"rpps_number\": \"10005878896015\", \"finess_number\": null, \"doctor_name\": \"Marc-Andr\\u00e9 DOMERGUE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Nicolas GALLAIS\", \"beneficiary_dob\": null, \"invoice_date\": \"01-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Marc-Andr\\u00e9 DOMERGUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-09-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture m\u00e9decine douce-GED E-J22-1328268_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770010387\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathilde Pereira\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HNASSON Kendji\", \"beneficiary_dob\": null, \"invoice_date\": \"28-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Mathilde Pereira - Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"28-08-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1039316_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1039316_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130010820\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Charlotte MAGNAN\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTINET Jean Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"29-12-2020\", \"security_number\": \"162081305576891\", \"invoice_issuer\": \"M\\u00e9dical Center Breteuil\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-12-2020\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1039410_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAURE AURELIE\", \"beneficiary_dob\": null, \"invoice_date\": \"21-12-2020\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-12-2020\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1041694_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1041694_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330004631\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CORVINO ISABELLE\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAZZIA ANNE LAURE\", \"beneficiary_dob\": null, \"invoice_date\": \"07-09-2020\", \"security_number\": null, \"invoice_issuer\": \"CORVINO ISABELLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-09-2020\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1041724_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1041724_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920002466\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathieu RETIF\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"Florent Marka\", \"insured_dob\": null, \"beneficiary_name\": \"Florent Marka\", \"beneficiary_dob\": null, \"invoice_date\": \"18-12-2020\", \"security_number\": \"259700235000\", \"invoice_issuer\": \"Mathieu RETIF - OSTEOPATHE D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation d'ost\\u00e9opathie (anamn\\u00e8se, examen clinique et traitement)\", \"quantity\": 1, \"date_of_service\": \"18-12-2020\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1041725_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1041725_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920002466\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathieu RETIF\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARKA Florent\", \"beneficiary_dob\": null, \"invoice_date\": \"28-12-2020\", \"security_number\": null, \"invoice_issuer\": \"Mathieu RETIF - OSTEOPATHE D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation d'ost\\u00e9opathie (anamn\\u00e8se, examen clinique et traitement)\", \"quantity\": 1, \"date_of_service\": \"28-12-2020\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1055608_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060002847\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Arnaud RAESS\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ENGELMANN Marie Laure\", \"beneficiary_dob\": null, \"invoice_date\": \"27-11-2020\", \"security_number\": \"272127511434289\", \"invoice_issuer\": \"Arnaud RAESS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-11-2020\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1055689_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690008164\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alice BUBBE\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAIVRE Abel\", \"beneficiary_dob\": null, \"invoice_date\": \"05-01-2021\", \"security_number\": \"180066938816416\", \"invoice_issuer\": \"Alice BUBBE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-01-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1062996_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130011216\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BISARAH Vanessa\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LABORDE DANIEL\", \"beneficiary_dob\": null, \"invoice_date\": \"22-12-2020\", \"security_number\": null, \"invoice_issuer\": \"BISARAH Vanessa\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE\", \"quantity\": 1, \"date_of_service\": \"22-12-2020\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/actmariaaafd02qfqdfdfe5024424af_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c0625556-2cb1-42c8-9433-e31274c121a3_Livret_de_famille_hamidouna-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0abfd7b9-9456-4b68-a489-bd990ef9a78e_livret_de_famille-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Certificate of honour for filing all statutory tax returns_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/rib-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1066100_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1066100_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"9C0000344\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mait\\u00e9 BEAUQUIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rodriguez Sergio\", \"beneficiary_dob\": \"14-02-1980\", \"invoice_date\": \"12-11-2020\", \"security_number\": \"1800299437001\", \"invoice_issuer\": \"MGP\", \"currency\": \"EUR\", \"items\": [{\"description\": \"TORCHE PILES PAR 1\", \"quantity\": 1, \"date_of_service\": \"12-11-2020\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1066370_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660003534\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurie GARCIA\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Firen Anthony\", \"beneficiary_dob\": null, \"invoice_date\": \"05-01-2021\", \"security_number\": null, \"invoice_issuer\": \"Laurie GARCIA OSTEOPATHE D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-01-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1066603_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770007714\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Astrid BLISAC\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Kryska Euchette Alison\", \"beneficiary_dob\": \"27-08-2009\", \"invoice_date\": \"22-12-2020\", \"security_number\": \"278109710117913\", \"invoice_issuer\": \"Astrid Blisac\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"22-12-2020\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1070373_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropratie\", \"adeli_number\": \"2B0000434\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mlle C\\u00e9line VINCENT\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marcelle FEDERICI\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"2450759350178\", \"invoice_issuer\": \"Mlle C\\u00e9line VINCENT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Visite\", \"quantity\": 1, \"date_of_service\": \"11-01-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1071225_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770007490\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Stacy FONTAINE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FIORUCCI Yann\", \"beneficiary_dob\": null, \"invoice_date\": \"03-12-2020\", \"security_number\": \"1851091027034\", \"invoice_issuer\": \"Stacy FONTAINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation chiropratique\", \"quantity\": 1, \"date_of_service\": \"03-12-2020\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1071226_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770007490\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Stacy FONTAINE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FIORUCCI Yann\", \"beneficiary_dob\": null, \"invoice_date\": \"10-12-2020\", \"security_number\": \"01851091027034\", \"invoice_issuer\": \"Stacy FONTAINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation chiropratique\", \"quantity\": 1, \"date_of_service\": \"10-12-2020\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1071291_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"450001524\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Clara Herv\\u00e9\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FISCHER Alain\", \"beneficiary_dob\": null, \"invoice_date\": \"23-12-2020\", \"security_number\": \"136045439501856\", \"invoice_issuer\": \"Clara Herv\\u00e9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-12-2020\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1072243_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440002293\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Iris Cornic\", \"total_billed\": 45.0, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cohard Geule\", \"beneficiary_dob\": null, \"invoice_date\": \"05-01-2021\", \"security_number\": null, \"invoice_issuer\": \"Iris Cornic\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-01-2021\", \"mandatory_coverage\": null, \"amount\": 45.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1073859_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330002395\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LOTTE Alexandre\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAURE Nicolas\", \"beneficiary_dob\": \"18-07-1972\", \"invoice_date\": \"07-01-2020\", \"security_number\": \"172071601518922\", \"invoice_issuer\": \"LOTTE Alexandre\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1078436_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690008164\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alice BUBBE\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAIVRE Abel\", \"beneficiary_dob\": null, \"invoice_date\": \"05-01-2021\", \"security_number\": \"180066938816416\", \"invoice_issuer\": \"Alice BUBBE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-01-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1078555_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"251044236\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Daniel HEITZMANN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": 25, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Pierre Yves ERBETTA\", \"beneficiary_dob\": null, \"invoice_date\": \"23-12-2020\", \"security_number\": \"191102538833771\", \"invoice_issuer\": \"Docteur Jean-Daniel HEITZMANN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation m\\u00e9dicale\", \"quantity\": 1, \"date_of_service\": \"23-12-2020\", \"mandatory_coverage\": 25, \"amount\": 25}, {\"description\": \"soins d'osth\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-12-2020\", \"mandatory_coverage\": 0, \"amount\": 35}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1078556_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"251 044236\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Daniel HEITZMANN\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": 25, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"Pierre Yves ERBETTA\", \"insured_dob\": null, \"beneficiary_name\": \"Pierre Yves ERBETTA\", \"beneficiary_dob\": null, \"invoice_date\": \"11-01-2021\", \"security_number\": \"191102538833771\", \"invoice_issuer\": \"Docteur Jean-Daniel HEITZMANN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation m\\u00e9dicale\", \"quantity\": 1, \"date_of_service\": \"11-01-2021\", \"mandatory_coverage\": 25, \"amount\": 25}, {\"description\": \"soins d'osth\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-01-2021\", \"mandatory_coverage\": 0, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1084027_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770003887\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mr Paquin Mickael\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Esnault Vincent\", \"beneficiary_dob\": null, \"invoice_date\": \"04-12-2020\", \"security_number\": null, \"invoice_issuer\": \"Mr Paquin Mickael\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-12-2020\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1090268_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330002395\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LOTTE Alexandre\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. FAURE Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": \"18-01-2021\", \"security_number\": \"172071601518922\", \"invoice_issuer\": \"LOTTE Alexandre\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1092486_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1092486_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350004701\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Luc ROUAUX\", \"total_billed\": 55.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean-Claude EZAN\", \"beneficiary_dob\": null, \"invoice_date\": \"14-12-2020\", \"security_number\": \"152053528810811\", \"invoice_issuer\": \"Jean-Luc ROUAUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-12-2020\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1093156_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770006633\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thierry TECHER\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Romain Even\", \"beneficiary_dob\": \"21-03-1996\", \"invoice_date\": null, \"security_number\": \"196034410958746\", \"invoice_issuer\": \"Thierry TECHER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"11-01-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1098837_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"11298\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Floriane GUILBAUD\", \"total_billed\": 8000, \"bill_paid\": true, \"amount_paid\": 8000, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"PIAZZA Francis\", \"insured_dob\": \"07-11-1950\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"25-02-2020\", \"security_number\": null, \"invoice_issuer\": \"Floriane GUILBAUD\", \"currency\": \"F\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"25-02-2020\", \"mandatory_coverage\": null, \"amount\": 8000}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1100809_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001955\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathilde Durand\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PAYOL Alexandre\", \"beneficiary_dob\": null, \"invoice_date\": \"06-01-2021\", \"security_number\": \"183038708531628\", \"invoice_issuer\": \"Mathilde Durand\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1101598_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940000771\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Wolfstyn Didier\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Provost Helene\", \"beneficiary_dob\": null, \"invoice_date\": \"15-12-2020\", \"security_number\": \"270076311343973\", \"invoice_issuer\": \"Wolfstyn Didier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-12-2020\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1101746_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"630001303\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Claude VERNHES\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FALCON Monique\", \"beneficiary_dob\": null, \"invoice_date\": \"26-01-2021\", \"security_number\": null, \"invoice_issuer\": \"CENTRE ONSLOW\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation chiropratique\", \"quantity\": 1, \"date_of_service\": \"26-01-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1102493_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1102493_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"117006460\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nicolas Roger\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Figols Jerome\", \"beneficiary_dob\": null, \"invoice_date\": \"11-01-2021\", \"security_number\": \"183068116301542\", \"invoice_issuer\": \"Nicolas Roger\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-01-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1102494_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"117006460\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nicolas Roger\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Petit Pierre Marjorie\", \"beneficiary_dob\": null, \"invoice_date\": \"06-01-2021\", \"security_number\": \"287035626019549\", \"invoice_issuer\": \"Nicolas Roger\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-01-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1102499_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130014566\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe BIFULCO\", \"total_billed\": 50.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cyrille FIARDO\", \"beneficiary_dob\": null, \"invoice_date\": \"13-01-2021\", \"security_number\": \"181061305590855\", \"invoice_issuer\": \"Philippe BIFULCO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1105977_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770003887\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Paquin Mickael\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Esnault Vincent\", \"beneficiary_dob\": null, \"invoice_date\": \"04-12-2020\", \"security_number\": null, \"invoice_issuer\": \"Paquin Mickael\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-12-2020\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1106091_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060006277\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anouk EVENE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"caurecastel\", \"insured_dob\": \"15-11-1971\", \"beneficiary_name\": \"FIORUCCI L\\u00e9o\", \"beneficiary_dob\": null, \"invoice_date\": \"19-10-2020\", \"security_number\": \"27113155547374\", \"invoice_issuer\": \"Anouk EVENE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-10-2020\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1106092_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060006277\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anouk EVENE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FIORUCCI L\\u00e9o\", \"beneficiary_dob\": null, \"invoice_date\": \"16-11-2020\", \"security_number\": \"271113155547374\", \"invoice_issuer\": \"Anouk EVENE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1106251_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1106251_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"040001281\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabrice GARNIER\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUDON Patricia\", \"beneficiary_dob\": null, \"invoice_date\": \"25-01-2021\", \"security_number\": \"269101310306164\", \"invoice_issuer\": \"Fabrice GARNIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"25-01-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1106674_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"090000035\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Bru C\\u00e9line\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"LOISEAU FAJADET Sean\", \"insured_dob\": null, \"beneficiary_name\": \"LOISEAU FAJADET Sean\", \"beneficiary_dob\": null, \"invoice_date\": \"06-01-2021\", \"security_number\": \"2841109122038\", \"invoice_issuer\": \"Bru C\\u00e9line\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation d'ost\\u00e9opathie pratiqu\\u00e9e sur\", \"quantity\": 1, \"date_of_service\": \"06-01-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1112007_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060010709\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Brice TURREL\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laurent Faraut\", \"beneficiary_dob\": \"03-09-1975\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Brice TURREL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"15-07-2020\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1112584_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870000684\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TROUTAUD MARIE\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"LESSECHE CLAUDINE\", \"insured_dob\": \"10-06-1976\", \"beneficiary_name\": \"LESSECHE CLAUDINE\", \"beneficiary_dob\": \"10-06-1976\", \"invoice_date\": \"27-01-2021\", \"security_number\": \"276062309603115\", \"invoice_issuer\": \"TROUTAUD MARIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-01-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1112613_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350007860\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lien DOUAGLIN\", \"total_billed\": 40, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FRASLIN Magali\", \"beneficiary_dob\": null, \"invoice_date\": \"11-01-2021\", \"security_number\": null, \"invoice_issuer\": \"Aur\\u00e9lien DOUAGLIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention\", \"quantity\": 1, \"date_of_service\": \"11-01-2021\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1112614_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350007860\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lien DOUAGLIN\", \"total_billed\": 40, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FRASLIN Magali\", \"beneficiary_dob\": null, \"invoice_date\": \"04-01-2021\", \"security_number\": \"269114410905038\", \"invoice_issuer\": \"Aur\\u00e9lien DOUAGLIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention\", \"quantity\": 1, \"date_of_service\": \"04-01-2021\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1113679_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910004522\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florian GARCIA\", \"total_billed\": 55.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MEISSONNIER Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"29-01-2021\", \"security_number\": null, \"invoice_issuer\": \"Mr Florian GARCIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-01-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1114152_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300003910\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"\\u00c9lodie TOM\\u00c9\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gr\\u00e9gory ENARD\", \"beneficiary_dob\": null, \"invoice_date\": \"26-11-2020\", \"security_number\": null, \"invoice_issuer\": \"\\u00c9lodie TOM\\u00c9 Ost\\u00e9opathe DO.TO.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie au cabinet\", \"quantity\": 1.0, \"date_of_service\": \"26-11-2020\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1119174_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Etiopathe\", \"doctor_name\": \"Mathieu LEPRESLE\", \"total_billed\": 50, \"bill_paid\": null, \"invoice_date\": \"21-01-2021\", \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ECOURTEMER Daniel\", \"beneficiary_dob\": null, \"security_number\": \"1480650129059\", \"invoice_issuer\": \"Mathieu LEPRESLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention(s) \\u00e9tiopathique(s)\", \"date_of_service\": \"21-01-2021\", \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1119175_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Etiopathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathieu LEPRESLE\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ECOURTEMER Daniel\", \"beneficiary_dob\": null, \"invoice_date\": \"25-01-2021\", \"security_number\": \"1480650129059\", \"invoice_issuer\": \"Mathieu LEPRESLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention(s) \\u00e9tiopathique(s)\", \"quantity\": 1, \"date_of_service\": \"25-01-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1130797_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAURE AURELIE\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1131706_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1131706_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440002368\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ana\\u00efs JACQUET\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Faric Nauc\", \"insured_dob\": null, \"beneficiary_name\": \"Faric Manc\", \"beneficiary_dob\": null, \"invoice_date\": \"02-02-2021\", \"security_number\": \"276082227824527\", \"invoice_issuer\": \"Ana\\u00efs JACQUET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"02-02-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1140659_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"13 1 13880 2\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean CECCALDI\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BSLINGON Jean Michel\", \"beneficiary_dob\": null, \"invoice_date\": \"25-06-2020\", \"security_number\": \"162101305543073\", \"invoice_issuer\": \"Docteur Jean CECCALDI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1142065_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"740004288\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"REFUS Olivier\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EYMARD Brice\", \"beneficiary_dob\": \"26-01-1984\", \"invoice_date\": \"05-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Cabinet Param\\u00e9dical\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1142179_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910008135\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Justine NO\\u00cbL\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLAHAUT Nathalie\", \"beneficiary_dob\": \"19-04-1973\", \"invoice_date\": \"21-11-2020\", \"security_number\": null, \"invoice_issuer\": \"NO\\u00cbL Justine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1142197_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"170000251\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LARRAGNAGUE \\u00c9lisa\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BRUGGISSER GWENDOLYN\", \"beneficiary_dob\": \"21-10-1985\", \"invoice_date\": \"04-02-2021\", \"security_number\": \"2851075112326\", \"invoice_issuer\": \"LARRAGNAGUE \\u00c9lisa\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires\", \"quantity\": null, \"date_of_service\": \"04-02-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1142334_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1142334_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"040001281\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabrice GARNIER\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUDON Patricia\", \"beneficiary_dob\": null, \"invoice_date\": \"09-02-2021\", \"security_number\": \"269101310306164\", \"invoice_issuer\": \"Fabrice GARNIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-02-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1142425_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"840002232\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathieu PRUDHOMME\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M PAUMIER Jean Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"30-07-2020\", \"security_number\": \"165081505593964\", \"invoice_issuer\": \"Mathieu PRUDHOMME\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"30-07-2020\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1142513_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130010705\", \"doctor_name\": \"TORDJMAN Charlaine\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"insured_name\": null, \"invoice_date\": \"26-01-2021\", \"invoice_issuer\": \"TORDJMAN Charlaine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"amount\": 50.0}], \"beneficiary_name\": \"FERRER Candice\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1148277_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"571031301\", \"rpps_number\": \"10002372935\", \"finess_number\": null, \"doctor_name\": \"Philippe MEYER-BISCH\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. GIUDICI Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"16-12-2020\", \"security_number\": \"165035746324356\", \"invoice_issuer\": \"Docteur Ph. MEYER-BISCH\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-12-2020\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1148278_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"04-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1149026_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAURE AURELIE\", \"beneficiary_dob\": null, \"invoice_date\": \"08-02-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1158779_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CARRE Eric\", \"total_billed\": 7000, \"bill_paid\": true, \"amount_paid\": 7000, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"FAUCQUEZ Monique\", \"insured_dob\": \"22-06-1952\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"29-09-2020\", \"security_number\": null, \"invoice_issuer\": \"CARRE Eric\", \"currency\": \"F\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-09-2020\", \"mandatory_coverage\": null, \"amount\": 7000}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1165234_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"447850454\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jo\\u00ebl FRADET\", \"total_billed\": 66, \"bill_paid\": true, \"amount_paid\": 66, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FLOUTLAY Anne Marie\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"02-02-2021\", \"security_number\": \"224012904120118\", \"invoice_issuer\": \"Jo\\u00ebl FRADET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie r\\u00e9alis\\u00e9e et r\\u00e9gl\\u00e9e ce jour.\", \"quantity\": 1, \"date_of_service\": \"02-02-2021\", \"mandatory_coverage\": null, \"amount\": 66}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1169015_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350007860\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lien DOUAGLIN\", \"total_billed\": 40, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FRASLIN Magali\", \"insured_dob\": null, \"beneficiary_name\": \"FRASLIN Magali\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2021\", \"security_number\": \"269114410905638\", \"invoice_issuer\": \"Aur\\u00e9lien DOUAGLIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention\", \"quantity\": 1, \"date_of_service\": \"27-01-2021\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1171146_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1171146_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"310005053\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marielle Jullien\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Perrine Fleury\", \"beneficiary_dob\": \"22-06-1984\", \"invoice_date\": \"16-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Marielle Jullien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-02-2021\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1174801_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590000121\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Bernard BLONDIAU\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sylvia Marli\\u00e8re\", \"beneficiary_dob\": null, \"invoice_date\": \"22-02-2021\", \"security_number\": \"280115935009075\", \"invoice_issuer\": \"Bernard BLONDIAU OSTEOPATHE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"22-02-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1174802_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590000121\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Bernard BLONDIAU\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cedric Marli\\u00e8re\", \"beneficiary_dob\": null, \"invoice_date\": \"19-02-2021\", \"security_number\": \"177085935031202\", \"invoice_issuer\": \"Bernard BLONDIAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"19-02-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1179550_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220003651\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Gabrielle Duong\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Boulanger Marie Louise\", \"beneficiary_dob\": null, \"invoice_date\": \"25-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Gabrielle Duong\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"25-02-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1180505_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"120000054\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE RAY CATHERINE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FABRE Daniel\", \"beneficiary_dob\": \"29-04-1951\", \"invoice_date\": \"10-11-2020\", \"security_number\": \"151041214514667\", \"invoice_issuer\": \"Soci\\u00e9t\\u00e9 d'ost\\u00e9opathes Le Ray\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1183772_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"720002625\", \"doctor_name\": \"Christopher BENQUET\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"invoice_date\": \"02-03-2021\", \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lemoine Jean Yves\", \"beneficiary_dob\": null, \"security_number\": \"140126116900692\", \"invoice_issuer\": \"Christopher BENQUET, ost\\u00e9opathe dipl\\u00f4m\\u00e9 (D.O.)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"date_of_service\": \"02-03-2021\", \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1183799_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620004366\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Zoete Claire\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUCOEUR St\\u00e9phane\", \"beneficiary_dob\": null, \"invoice_date\": \"18-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Zoete Claire\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-02-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1183991_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"400002044\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BLANC-GONNET Anne\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Erchebaron Daniele\", \"beneficiary_dob\": null, \"invoice_date\": \"26-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Madame BLANC-GONNET Anne\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1184131_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"620001115\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dr. Dino Ercolani\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HENON S\\u00e9verine\", \"beneficiary_dob\": null, \"invoice_date\": \"26-02-2021\", \"security_number\": \"271056211923048\", \"invoice_issuer\": \"Dino ERCOLANI D.C. CHIROPRACTEUR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-02-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1184306_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130017585\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Raffi VACHER\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Naun\\u00e8s Didien\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2021\", \"security_number\": null, \"invoice_issuer\": \"Raffi VACHER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"27-01-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1184307_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130017585\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Raffi VACHER\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Naun\\u00e8s Didien\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Raffi VACHER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"10-02-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1188165_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"117005546\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mr POTABES David\", \"total_billed\": 45, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"M JOAQIN FERRANDIZ\", \"insured_dob\": null, \"beneficiary_name\": \"M JOAQIN FERRANDIZ\", \"beneficiary_dob\": null, \"invoice_date\": \"23-02-2021\", \"security_number\": \"1610999134539\", \"invoice_issuer\": \"CENTRE DE KINESITHERAPIE REEDUCATION ET OSTEOPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-02-2021\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1193369_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"210000048\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SPINNEWEBER Mathieu\", \"total_billed\": 62.0, \"bill_paid\": true, \"amount_paid\": 62.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAISSUS Pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"01-02-2021\", \"security_number\": \"178071003301608\", \"invoice_issuer\": \"SPINNEWEBER Mathieu\", \"currency\": \"EUR\", \"items\": [{\"description\": \"01 seance osteopathie\", \"quantity\": 1, \"date_of_service\": \"01-02-2021\", \"mandatory_coverage\": null, \"amount\": 69.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1197096_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1197096_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"040000986\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CAMMARATA La\\u00ebtitia\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Bernaud FAUDON\", \"insured_dob\": null, \"beneficiary_name\": \"FAUDON Julie\", \"beneficiary_dob\": null, \"invoice_date\": \"04-03-2021\", \"security_number\": \"165040411211734\", \"invoice_issuer\": \"CAMMARATA La\\u00ebtitia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"04-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1198077_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"597091032\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORIN S\\u00e9bastien\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"St\\u00e9phane CHOMBARD\", \"beneficiary_dob\": null, \"invoice_date\": \"19-12-2022\", \"security_number\": \"176095928601949\", \"invoice_issuer\": \"FLORIN S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-12-2022\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1198078_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"597091032\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORIN S\\u00e9bastien\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Aur\\u00e9lie CHOMBARD\", \"beneficiary_dob\": null, \"invoice_date\": \"23-02-2021\", \"security_number\": \"284095959904090\", \"invoice_issuer\": \"FLORIN S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-02-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1199901_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640001400\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"riotte claire\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"fabas jon\", \"beneficiary_dob\": \"02-05-2004\", \"invoice_date\": \"22-02-2021\", \"security_number\": \"104057511255604\", \"invoice_issuer\": \"riotte claire\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation adulte\", \"quantity\": 1, \"date_of_service\": \"22-02-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1199902_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640001400\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"riotte claire\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"fabas maeva\", \"beneficiary_dob\": \"02-07-2002\", \"invoice_date\": \"01-03-2021\", \"security_number\": \"2020775113429\", \"invoice_issuer\": \"riotte claire\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation adulte\", \"quantity\": 1, \"date_of_service\": \"01-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1203226_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130003460\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"V\\u00e9ronique GARIN\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTELLINO RICHARD\", \"beneficiary_dob\": null, \"invoice_date\": \"05-03-2021\", \"security_number\": \"1561283050372\", \"invoice_issuer\": \"V\\u00e9ronique GARIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1207573_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"400002044\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Madame BLANC-GONNET Anne\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Etchebarren Daniele\", \"beneficiary_dob\": null, \"invoice_date\": \"08-03-2021\", \"security_number\": \"258052403704687\", \"invoice_issuer\": \"Madame BLANC-GONNET Anne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 seance d'osteopathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1210226_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"110000057\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jacques BERTRAND\", \"total_billed\": 52, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAURE Karen\", \"beneficiary_dob\": null, \"invoice_date\": \"09-03-2021\", \"security_number\": null, \"invoice_issuer\": \"CENTRE PLENITUDE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-03-2021\", \"mandatory_coverage\": null, \"amount\": 52}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1210372_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330005588\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Perrine DUBOS\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAJKO Alexandre\", \"beneficiary_dob\": null, \"invoice_date\": \"03-09-2020\", \"security_number\": null, \"invoice_issuer\": \"Perrine DUBOS OST\\u00c9OPATHE D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1210953_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640000147\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Michel BAZIN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE LAY Genevi\\u00e8ve\", \"beneficiary_dob\": null, \"invoice_date\": \"03-03-2021\", \"security_number\": \"250028718702670\", \"invoice_issuer\": \"Jean-Michel BAZIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1212466_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Faure Amelie\", \"beneficiary_dob\": null, \"invoice_date\": \"03-03-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1212467_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Faure Aurelie\", \"beneficiary_dob\": null, \"invoice_date\": \"09-03-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1212858_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"210001715\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mme VIARD Megan\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TAUEQNIER Karine\", \"beneficiary_dob\": null, \"invoice_date\": \"24-02-2021\", \"security_number\": \"2740471153093\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie de l'universit\\u00e9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"24-02-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1216961_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"880000260\", \"rpps_number\": \"10005446835\", \"doctor_name\": \"COLNOT Didier\", \"total_billed\": 80, \"bill_paid\": true, \"invoice_date\": \"12-03-2021\", \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FERRO Thomas\", \"beneficiary_dob\": null, \"security_number\": \"183038816014952\", \"invoice_issuer\": \"CABINET D' OSTEOPATHIE ESPACE MAJORELLE\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathe\", \"date_of_service\": \"12-03-2021\", \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1217462_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590000121\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Bernard BLONDIAU\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sylvia Marli\\u00e8re\", \"beneficiary_dob\": null, \"invoice_date\": \"10-03-2021\", \"security_number\": \"280115935009075\", \"invoice_issuer\": \"Bernard BLONDIAU OSTEOPATHE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1217517_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dupuy Aubin\", \"beneficiary_dob\": null, \"invoice_date\": \"15-03-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1221348_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780001178\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mikael VAN MOORLEGHEM\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Turban Virginie\", \"beneficiary_dob\": null, \"invoice_date\": \"15-03-2021\", \"security_number\": \"276087218102857\", \"invoice_issuer\": \"Mikael VAN MOORLEGHEM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-03-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1221391_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440009975\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jo\\u00ebl FRADET\", \"total_billed\": 66, \"bill_paid\": true, \"amount_paid\": 66, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Flodilay Anne Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"04-03-2021\", \"security_number\": \"0224012904120118\", \"invoice_issuer\": \"Jo\\u00ebl FRADET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie r\\u00e9alis\\u00e9e et r\\u00e9gl\\u00e9e ce jour\", \"quantity\": 1, \"date_of_service\": \"04-03-2021\", \"mandatory_coverage\": null, \"amount\": 66}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1224528_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"170000178\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GIBOUT OLIVIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TELLIER Jocelyne\", \"beneficiary_dob\": null, \"invoice_date\": \"08-03-2021\", \"security_number\": \"1540577427002\", \"invoice_issuer\": \"GIBOUT OLIVIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"08-03-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1224543_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"650000169\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FURLAN Bruno\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAUNET ADELINE\", \"beneficiary_dob\": null, \"invoice_date\": \"26-02-2021\", \"security_number\": \"290086444503403\", \"invoice_issuer\": \"Mr FURLAN Bruno\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-02-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1228504_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"757012950\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LHUILLERY Florence\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Filoz Fabrice\", \"beneficiary_dob\": null, \"invoice_date\": \"18-03-2021\", \"security_number\": null, \"invoice_issuer\": \"LHUILLERY Florence\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1229983_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"290002435\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CORRE Elodie\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Runavot Jacques\", \"beneficiary_dob\": \"27-05-1947\", \"invoice_date\": \"17-03-2021\", \"security_number\": \"147052920901568\", \"invoice_issuer\": \"CORRE Elodie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"17-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1230684_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770001048\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COSSE G\\u00e9rard\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Devillier Audrey\", \"beneficiary_dob\": null, \"invoice_date\": \"19-03-2021\", \"security_number\": \"2840477131031\", \"invoice_issuer\": \"COSSE G\\u00e9rard\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"19-03-2021\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-1231262_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060011889\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PILLONE Margot\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FANTINO Jeanette\", \"beneficiary_dob\": null, \"invoice_date\": \"09-08-2021\", \"security_number\": null, \"invoice_issuer\": \"PILLONE Margot\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie \\u00e0 domicile\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25002334_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770000842\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"R\\u00e9nald THOUVENIN\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"JEAN Sandrine\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"19-01-2021\", \"security_number\": \"269043306301493\", \"invoice_issuer\": \"R\\u00e9nald THOUVENIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"traitement ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"19-01-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25003607_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130021272\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emeline BRISSE\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FIORE Christophe\", \"beneficiary_dob\": null, \"invoice_date\": \"20-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Emeline GRISSE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"20-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25007101_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000644\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERNEAU Emilie\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIRARDEAU Sabrina\", \"beneficiary_dob\": null, \"invoice_date\": \"05-03-2021\", \"security_number\": \"2 75 12 17 369 11 09\", \"invoice_issuer\": \"BERNEAU Emilie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-03-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25012184_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300004694\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CARIAT William\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SALORD R\\u00e9gine\", \"beneficiary_dob\": null, \"invoice_date\": \"23-03-2021\", \"security_number\": \"253073000702853\", \"invoice_issuer\": \"CARIAT William\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-03-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25017192_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830000758\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurent MATEOS\", \"total_billed\": 60.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 60.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MATEOS Cecile\", \"beneficiary_dob\": null, \"invoice_date\": \"26-02-2021\", \"security_number\": \"273108519112471\", \"invoice_issuer\": \"Laurent MATEOS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-02-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25021617_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940000607\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Brigitte Lefevre-Ward\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FERREIRA Dominique\", \"beneficiary_dob\": null, \"invoice_date\": \"19-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Brigitte Lefevre-Ward\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"19-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25027983_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"830003240\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Corinne BAUX\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GARCIA BOUVRES Andy\", \"beneficiary_dob\": null, \"invoice_date\": \"26-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Corinne BAUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Visite chiropractique\", \"quantity\": 1, \"date_of_service\": \"26-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25029729_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Didier GARRIGUES\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FELD Nadime\", \"beneficiary_dob\": null, \"invoice_date\": \"11-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Didier GARRIGUES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Visite\", \"quantity\": 1, \"date_of_service\": \"11-03-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25029730_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Didier GARRIGUES\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FELD Nadime\", \"beneficiary_dob\": null, \"invoice_date\": \"30-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Didier GARRIGUES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Visite\", \"quantity\": 1, \"date_of_service\": \"30-03-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB-Binh-Minh-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/355eee5a-5358-43fd-a0b5-ae7d62406996_livretDeFamille-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB-Dung-1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Promesse-dembauche-212x300_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/RIB-TESTNHIDEUX-D-9-4-2025_152113_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 26932272_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Extrait acte de naissance - 26616051-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ORCID_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25033934_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770005213\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie Messager\", \"total_billed\": 80.0, \"bill_paid\": true, \"amount_paid\": 80.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Martial Chabrol\", \"beneficiary_dob\": \"01-11-1971\", \"invoice_date\": null, \"security_number\": \"171117728801179\", \"invoice_issuer\": \"Marie Messager\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"08-03-2021\", \"mandatory_coverage\": null, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25034588_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"110002367\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marta Puente Mu\\u00f1oz\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIRGINIE FERNANDEZ\", \"beneficiary_dob\": null, \"invoice_date\": \"12-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Marta Puente Mu\\u00f1oz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-03-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"110002367\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marta Puente Mu\\u00f1oz\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIRGINIE FERNANDEZ\", \"beneficiary_dob\": null, \"invoice_date\": \"25-01-2021\", \"security_number\": null, \"invoice_issuer\": \"Marta Puente Mu\\u00f1oz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"25-01-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25036151_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780002614\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"La\\u00ebtitia ROUSSEL\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAGET Julien\", \"beneficiary_dob\": null, \"invoice_date\": \"07-04-2021\", \"security_number\": \"1820931555391\", \"invoice_issuer\": \"La\\u00ebtitia ROUSSEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"07-04-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25036908_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"170000178\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GIBOUT OLIVIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TELLIER Daniel\", \"beneficiary_dob\": null, \"invoice_date\": \"23-03-2021\", \"security_number\": \"1540577427002\", \"invoice_issuer\": \"GIBOUT OLIVIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"23-03-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25040113_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440002293\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Iris Cornic\", \"total_billed\": 45.0, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COHARD Gale\", \"beneficiary_dob\": null, \"invoice_date\": \"02-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Iris Cornic\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"02-03-2021\", \"mandatory_coverage\": null, \"amount\": 45.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25048435_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"91000496\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sabine COLLINET\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"L'Helguen St\\u00e9phanie\", \"beneficiary_dob\": null, \"invoice_date\": \"07-04-2021\", \"security_number\": \"285112901902265\", \"invoice_issuer\": \"COLLINET Sabine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-04-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25054565_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25054565_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690000302\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Violaine PERRIN\", \"total_billed\": 56, \"bill_paid\": true, \"amount_paid\": 56, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUCHE Yves\", \"beneficiary_dob\": null, \"invoice_date\": \"12-04-2021\", \"security_number\": \"163021106905896\", \"invoice_issuer\": \"Violaine PERRIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-04-2021\", \"mandatory_coverage\": null, \"amount\": 56}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25055307_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"050000033\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Damien BERARD\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ESPITALLIER Celine\", \"beneficiary_dob\": \"24-05-1989\", \"invoice_date\": \"13-04-2021\", \"security_number\": null, \"invoice_issuer\": \"Damien BERARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-04-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25055888_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"210000659\", \"rpps_number\": \"10001835486\", \"finess_number\": null, \"doctor_name\": \"Jean AOUSTON\", \"total_billed\": 80.0, \"bill_paid\": true, \"amount_paid\": 80.0, \"mandatory_coverage\": 30.0, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"FESSARD CATHERINE\", \"insured_dob\": \"10-10-1977\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"01-12-2020\", \"security_number\": \"277102127802166\", \"invoice_issuer\": \"Cabinet m\\u00e9dico-chirurgical Victor HUGO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation m\\u00e9dicale et soins d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-12-2020\", \"mandatory_coverage\": 30.0, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25058603_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAURE AURELIE\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25059182_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"650000078\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabienne SIGAUD-GARCIA\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAVE MICHELLE\", \"beneficiary_dob\": null, \"invoice_date\": \"05-03-2021\", \"security_number\": \"2540817347266 21\", \"invoice_issuer\": \"Fabienne SIGAUD-GARCIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"seance d'osteopathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25073562_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780001178\", \"doctor_name\": \"mikael VAN MOORLEGHEM\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Turban Virginie\", \"beneficiary_dob\": null, \"invoice_date\": \"25-03-2021\", \"security_number\": \"276087218102857\", \"invoice_issuer\": \"mikael VAN MOORLEGHEM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"date_of_service\": \"25-03-2021\", \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25073913_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220000335\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RODIER Franck\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FONTENEAU KARINE\", \"beneficiary_dob\": \"18-03-1971\", \"invoice_date\": \"10-03-2021\", \"security_number\": \"271035048402274\", \"invoice_issuer\": \"RODIER Franck\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25076023_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950000265\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PIERRE-ALEXIS MASSERANN\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAIVRE Cl\\u00e9ment\", \"beneficiary_dob\": null, \"invoice_date\": \"09-02-2021\", \"security_number\": null, \"invoice_issuer\": \"PIERRE-ALEXIS MASSERANN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25076072_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690017595\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mr PEILLON Julien\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EVEN Romain\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2021\", \"security_number\": \"196034410958746\", \"invoice_issuer\": \"CENTRE DE KINE DU SPORT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"UNE SEANCE D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"16-04-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25076150_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"447850454\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jo\\u00ebl FRADET\", \"total_billed\": 66, \"bill_paid\": true, \"amount_paid\": 66, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Floencay\", \"beneficiary_dob\": null, \"invoice_date\": \"01-04-2021\", \"security_number\": \"224012904120118\", \"invoice_issuer\": \"Jo\\u00ebl FRADET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie r\\u00e9alis\\u00e9e et r\\u00e9gl\\u00e9e ce jour.\", \"quantity\": 1, \"date_of_service\": \"01-04-2021\", \"mandatory_coverage\": null, \"amount\": 66}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25077060_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"260004395\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARRIER Am\\u00e9lie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SANTACRU Patrice\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2021\", \"security_number\": null, \"invoice_issuer\": \"CHARRIER Am\\u00e9lie - Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"17-04-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25082789_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340005867\", \"doctor_name\": \"Ana\\u00efs PONS\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"insured_name\": null, \"insured_dob\": null, \"security_number\": \"2491011225008\", \"invoice_date\": \"29-03-2021\", \"invoice_issuer\": \"Ana\\u00efs PONS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Traitement Ost\\u00e9opathie\", \"date_of_service\": \"29-03-2021\", \"amount\": 50}], \"beneficiary_name\": \"FONGHETTI Marie Antoinette\", \"beneficiary_dob\": \"20-10-1949\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25082790_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340005867\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ana\\u00efs PONS\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie Antoinette FONGHETTI\", \"beneficiary_dob\": \"20-10-1949\", \"invoice_date\": \"12-04-2021\", \"security_number\": \"2491011225008\", \"invoice_issuer\": \"Ana\\u00efs PONS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-04-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25082924_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920000775\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Carole de LANGLAIS\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Valerie GRIALOU\", \"beneficiary_dob\": null, \"invoice_date\": \"09-04-2021\", \"security_number\": null, \"invoice_issuer\": \"Carole de LANGLAIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-04-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25085836_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"900000169\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"St\\u00e9phane BEPLANSA\", \"total_billed\": 80, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 80, \"insured_name\": \"ELISABETH Magalie\", \"insured_dob\": null, \"beneficiary_name\": \"ELISABETH Magalie\", \"beneficiary_dob\": null, \"invoice_date\": \"18-02-2021\", \"security_number\": \"2680797416303\", \"invoice_issuer\": \"St\\u00e9phane BERLANGA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-02-2021\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25089174_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620002568\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien DIEU\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEJAY Valentin\", \"beneficiary_dob\": \"01-02-1994\", \"invoice_date\": \"08-04-2021\", \"security_number\": \"194026275822135\", \"invoice_issuer\": \"Julien DIEU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Bilan et Traitement Ost\\u00e9opathique\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25093559_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"627033780\", \"rpps_number\": \"10005902118\", \"finess_number\": null, \"doctor_name\": \"Monsieur Cornu. K\", \"total_billed\": 52, \"bill_paid\": true, \"amount_paid\": 52, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"GUILLAUME Myl\\u00e8ne\", \"insured_dob\": null, \"beneficiary_name\": \"Mme POQUET Justine\", \"beneficiary_dob\": null, \"invoice_date\": \"26-04-2021\", \"security_number\": \"274076224901135\", \"invoice_issuer\": \"Cabinet M\\u00e9dical SANTE GAK\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-04-2021\", \"mandatory_coverage\": null, \"amount\": 52}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25106529_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000644\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERNEAU Emilie\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M Faure Miquela\", \"beneficiary_dob\": null, \"invoice_date\": \"17-02-2021\", \"security_number\": null, \"invoice_issuer\": \"BERNEAU Emilie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-02-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25106530_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25106530_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000644\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERNEAU Emilie\", \"total_billed\": 50.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M Faure Miquela\", \"beneficiary_dob\": null, \"invoice_date\": \"18-06-2019\", \"security_number\": null, \"invoice_issuer\": \"BERNEAU Emilie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 Seance Osteopathie\", \"quantity\": 1, \"date_of_service\": \"18-06-2019\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25117295_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25117295_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340008473\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MIRALLES Alexandra\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FABRE Audrey\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2021\", \"security_number\": null, \"invoice_issuer\": \"MIRALLES Alexandra\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-02-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25117297_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25117297_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340008473\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MIRALLES Alexandra\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Madame FABRE\", \"beneficiary_dob\": null, \"invoice_date\": \"19-02-2021\", \"security_number\": null, \"invoice_issuer\": \"MIRALLES Alexandra\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-02-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25117353_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590011201\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DUTHOY Maxime\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUQUENOI Herv\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"06-05-2021\", \"security_number\": \"175055959905022\", \"invoice_issuer\": \"DUTHOY Maxime\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"06-05-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25117596_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25117596_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"850001918\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FABRIGUES Marc\", \"beneficiary_dob\": null, \"invoice_date\": \"21-04-2021\", \"security_number\": \"158115439526705\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25117961_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25117961_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910001791\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9dric MILOE\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FALLA Adjanide\", \"insured_dob\": \"13-11-1984\", \"beneficiary_name\": \"FALLA Adjanide\", \"beneficiary_dob\": \"13-11-1984\", \"invoice_date\": \"26-04-2021\", \"security_number\": \"284119710119312\", \"invoice_issuer\": \"C\\u00e9dric MILOE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-04-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25131810_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770004257\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dombrecht Thibault\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fernandez Valentine\", \"beneficiary_dob\": null, \"invoice_date\": \"05-05-2021\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Dombrecht Thibault\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-05-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25136270_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25136270_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130013705\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TORDJMAN Charlaine\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"FERRER Anthony\", \"insured_dob\": null, \"beneficiary_name\": \"FERRER Anthony\", \"beneficiary_dob\": null, \"invoice_date\": \"28-04-2021\", \"security_number\": \"188091305510974\", \"invoice_issuer\": \"TORDJMAN Charlaine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"28-04-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25143750_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25143750_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330000803\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ERIC DIEZ\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. FOLGADO FREDERIC\", \"beneficiary_dob\": null, \"invoice_date\": \"18-01-2021\", \"security_number\": \"174119521903339\", \"invoice_issuer\": \"ERIC DIEZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25143751_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25143751_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330000803\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ERIC DIEZ\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FOLGADO FREDERIC\", \"beneficiary_dob\": null, \"invoice_date\": \"26-04-2021\", \"security_number\": \"174119521903339\", \"invoice_issuer\": \"ERIC DIEZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25143841_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910001429\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabien GARREAU\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fanny Lemort\", \"beneficiary_dob\": null, \"invoice_date\": \"12-05-2021\", \"security_number\": null, \"invoice_issuer\": \"Fabien GARREAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultations\", \"quantity\": 1, \"date_of_service\": \"12-05-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25143901_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25143901_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770001048\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COSSE G\\u00e9rard\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Eymard Brice\", \"beneficiary_dob\": null, \"invoice_date\": \"10-05-2021\", \"security_number\": null, \"invoice_issuer\": \"COSSE G\\u00e9rard\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"10-05-2021\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25144036_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"650000540\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Amandine Garrocq\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lapeyre Michel\", \"beneficiary_dob\": null, \"invoice_date\": \"26-03-2021\", \"security_number\": \"181036504701824\", \"invoice_issuer\": \"Amandine Garrocq\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-03-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25144037_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"650000540\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Amandine Garrocq\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Michel Lapeyre\", \"beneficiary_dob\": null, \"invoice_date\": \"20-04-2021\", \"security_number\": \"181036504701824\", \"invoice_issuer\": \"Amandine Garrocq\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"20-04-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25145711_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"820000289\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Amelie R\\u00e9gnier\", \"total_billed\": 58, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FEDOU Tibery\", \"insured_dob\": \"08-12-1993\", \"beneficiary_name\": \"FEDOU Tibery\", \"beneficiary_dob\": \"08-12-1993\", \"invoice_date\": \"12-04-2021\", \"security_number\": \"193123155545055\", \"invoice_issuer\": \"Amelie R\\u00e9gnier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Informations Consultation\", \"quantity\": null, \"date_of_service\": \"12-04-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25152384_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920001518\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne GUERRAULT\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Romain MORLAES\", \"beneficiary_dob\": null, \"invoice_date\": \"12-05-2021\", \"security_number\": \"181029203504795\", \"invoice_issuer\": \"Anne GUERRAULT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-05-2021\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25152747_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dr ROBERT-LANG Christine\", \"total_billed\": 50, \"bill_paid\": true, \"mandatory_coverage\": 30, \"complementary_coverage\": null, \"client_part\": null, \"beneficiary_name\": \"Guy PERK\", \"invoice_date\": \"17-05-2021\", \"invoice_issuer\": \"Docteur Christine ROBERT-LANG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation m\\u00e9dicale\", \"quantity\": 1, \"date_of_service\": \"17-05-2021\", \"amount\": 30}, {\"description\": \"soins hom\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-05-2021\", \"amount\": 20}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25161298_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25161298_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"420002339\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TCHAPOUTIAN Pierre Benoit\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Domec Claude\", \"beneficiary_dob\": null, \"invoice_date\": \"20-05-2021\", \"security_number\": \"143013104200723\", \"invoice_issuer\": \"Mr TCHAPOUTIAN Pierre Benoit\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"20-05-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25161622_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Albane Templier\", \"total_billed\": 90, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Michael EXPOSITO\", \"beneficiary_dob\": null, \"invoice_date\": \"23-06-2021\", \"security_number\": null, \"invoice_issuer\": \"Albane Templier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation hypnoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"23-06-2021\", \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25161623_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Hypnoth\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Albane Templier\", \"total_billed\": 90, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Michael Exposito\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Albane Templier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation hypnoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"10-02-2021\", \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25164623_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870000940\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Elodie DUMAS\", \"total_billed\": 35, \"bill_paid\": true, \"amount_paid\": 35, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUPUY AUBIN\", \"beneficiary_dob\": null, \"invoice_date\": \"14-05-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"Elodie DUMAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-05-2021\", \"mandatory_coverage\": null, \"amount\": 35}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25166322_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130000409\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ESNAULT Gilles\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTINEZ Elisa\", \"beneficiary_dob\": null, \"invoice_date\": \"18-05-2021\", \"security_number\": null, \"invoice_issuer\": \"ESNAULT Gilles\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Nombre d'acte(s) pratiqu\\u00e9(s): 1\", \"quantity\": 1, \"date_of_service\": \"18-05-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25167333_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"597091032\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORIN S\\u00e9bastien\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"St\\u00e9phane CHOMBART\", \"beneficiary_dob\": null, \"invoice_date\": \"18-05-2021\", \"security_number\": \"176095928601949\", \"invoice_issuer\": \"FLORIN S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-05-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25167334_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"597091032\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORIN S\\u00e9bastien\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHOMBART Amelie\", \"beneficiary_dob\": null, \"invoice_date\": \"18-05-2021\", \"security_number\": \"284095959904090\", \"invoice_issuer\": \"FLORIN S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-05-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25169425_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25169425_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130003460\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"V\\u00e9ronique GARIN\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTELLINO Josiane\", \"beneficiary_dob\": null, \"invoice_date\": \"25-05-2021\", \"security_number\": \"256101305515510\", \"invoice_issuer\": \"V\\u00e9ronique GARIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"25-05-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25169898_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"100000322\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PARENTI Violaine\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EGGER Michele\", \"beneficiary_dob\": \"02-01-1952\", \"invoice_date\": \"18-05-2021\", \"security_number\": \"25201722181014\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie PARENTI Violaine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-05-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25175432_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"9D0001183\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VAUDIN Lo\\u00efc\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Esparon Erich\", \"beneficiary_dob\": null, \"invoice_date\": \"11-05-2021\", \"security_number\": \"920005071000\", \"invoice_issuer\": \"VAUDIN Lo\\u00efc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-05-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25175989_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25175989_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alain Girardon\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": \"Marie FINART\", \"insured_dob\": null, \"beneficiary_name\": \"Marie FINART\", \"beneficiary_dob\": null, \"invoice_date\": \"25-01-2021\", \"security_number\": \"202033960631634\", \"invoice_issuer\": \"Alain Girardon - Ost\\u00e9opathe DO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"25-01-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alain Girardon\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie FINART\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2021\", \"security_number\": \"202033960631634\", \"invoice_issuer\": \"Alain Girardon - Ost\\u00e9opathe DO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-02-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25176008_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"050000033\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Damien BERARD\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ESPITALLIER Rose\", \"beneficiary_dob\": \"14-09-1961\", \"invoice_date\": \"25-05-2021\", \"security_number\": \"261096121430928\", \"invoice_issuer\": \"Damien BERARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-05-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25178581_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950000315\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thierry LAURENT\", \"total_billed\": 65, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FOGGIA Sandra\", \"beneficiary_dob\": null, \"invoice_date\": \"28-05-2021\", \"security_number\": null, \"invoice_issuer\": \"Thierry LAURENT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"28-05-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25179328_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330034433\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien MACAUD\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLOCH Jacqueline\", \"beneficiary_dob\": null, \"invoice_date\": \"25-05-2021\", \"security_number\": \"242076058900255\", \"invoice_issuer\": \"Julien MACAUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"25-05-21\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25181197_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"400000667\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M\\u00e9lody Frecaut\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUVEL XAVIER\", \"beneficiary_dob\": null, \"invoice_date\": \"18-05-2021\", \"security_number\": \"160117115308503\", \"invoice_issuer\": \"M\\u00e9lody Frecaut\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-05-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25182407_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"310004411\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BROUHOUET Anna\\u00eblle\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"RUELLE Martine\", \"insured_dob\": null, \"beneficiary_name\": \"RUELLE Martine\", \"beneficiary_dob\": null, \"invoice_date\": \"11-12-2017\", \"security_number\": \"263038114009975\", \"invoice_issuer\": \"Cabinet Chiropratique Roseraie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en chiropratique\", \"quantity\": 1, \"date_of_service\": \"11-12-2017\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25182408_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25182408_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropratie\", \"adeli_number\": \"310004411\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BROUHOUET Anna\\u00eblle\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"RUELLE Martine\", \"insured_dob\": null, \"beneficiary_name\": \"RUELLE Martine\", \"beneficiary_dob\": null, \"invoice_date\": \"09-11-2017\", \"security_number\": \"263038114009975\", \"invoice_issuer\": \"Cabinet Chiropratique Roseraie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en chiropratique\", \"quantity\": 1, \"date_of_service\": \"09-11-2017\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25182409_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"310004411\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BROUHOUET Anna\\u00eblle\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"RUELLE Martine\", \"insured_dob\": null, \"beneficiary_name\": \"RUELLE Martine\", \"beneficiary_dob\": null, \"invoice_date\": \"27-10-2017\", \"security_number\": \"263038114009975\", \"invoice_issuer\": \"Cabinet Chiropratique Roseraie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en chiropratique\", \"quantity\": 1, \"date_of_service\": \"27-10-2017\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25182410_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"310004411\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BROUHOUET Anna\\u00eblle\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"RUELLE Martine\", \"insured_dob\": null, \"beneficiary_name\": \"RUELLE Martine\", \"beneficiary_dob\": null, \"invoice_date\": \"03-10-2017\", \"security_number\": \"263038114009975\", \"invoice_issuer\": \"Cabinet Chiropratique Roseraie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en chiropratique\", \"quantity\": 1, \"date_of_service\": \"03-10-2017\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25183619_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"210001772\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"QUENTIN CHARBONNEAU\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"H\\u00e9not Nathan\", \"beneficiary_dob\": null, \"invoice_date\": \"07-05-2021\", \"security_number\": null, \"invoice_issuer\": \"QUENTIN CHARBONNEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"UNE SEANCE D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"07-05-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25183658_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830006136\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pierre Etienne DUVAL\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme faivre danielle\", \"beneficiary_dob\": \"05-10-1945\", \"invoice_date\": \"23-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Pierre Etienne DUVAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie draguignan\", \"quantity\": 1, \"date_of_service\": \"23-03-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25184785_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220000582\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BIGOT THIERRY\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie-elise FONTAINE\", \"beneficiary_dob\": null, \"invoice_date\": \"09-04-2021\", \"security_number\": \"273059741615762\", \"invoice_issuer\": \"BIGOT THIERRY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-04-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25186075_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25186075_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"930001862\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sabrina BENZINEB\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Florian EL ARBI\", \"beneficiary_dob\": null, \"invoice_date\": \"26-05-2021\", \"security_number\": \"182121411845494\", \"invoice_issuer\": \"Cabinet Chiropratique Sabrina BENZINEB D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-05-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25187935_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25187935_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"040001281\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabrice GARNIER\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUDON Patricia\", \"beneficiary_dob\": null, \"invoice_date\": \"25-05-2021\", \"security_number\": \"269101310306164\", \"invoice_issuer\": \"Fabrice GARNIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"25-05-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25187936_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25187936_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"040001281\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabrice GARNIER\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUDON Julie\", \"beneficiary_dob\": null, \"invoice_date\": \"28-05-2021\", \"security_number\": \"202091300126180\", \"invoice_issuer\": \"Fabrice GARNIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"FACTURE CONSULTATION OSTEOPATHIE\", \"quantity\": null, \"date_of_service\": \"28-05-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25190164_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"627022445\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Boquet J\\u00e9r\\u00e9mie\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fontaine Marc\", \"beneficiary_dob\": null, \"invoice_date\": \"02-06-2021\", \"security_number\": null, \"invoice_issuer\": \"Boquet J\\u00e9r\\u00e9mie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9pathie / posturologie\", \"quantity\": 1, \"date_of_service\": \"02-06-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25190194_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25190194_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620003426\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lo\\u00efc Demoury\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M MERLIN Olivier\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2021\", \"security_number\": \"1751262193102\", \"invoice_issuer\": \"Lo\\u00efc Demoury\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-04-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25192273_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670002146\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Adrien BIRY\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FINANCE Mathilde\", \"beneficiary_dob\": \"17-05-1996\", \"invoice_date\": \"01-06-2021\", \"security_number\": \"296056746225909\", \"invoice_issuer\": \"Adrien Biry Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie, Acquitt\\u00e9e\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25192590_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25192590_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Chiropractie\", \"adeli_number\": \"950007963\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Quentin-Fran\\u00e7ois Boissier\", \"total_billed\": 190, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Eriola Marie-Jose\", \"insured_dob\": \"12-07-1961\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"01-04-2021\", \"security_number\": \"261070331003226\", \"invoice_issuer\": \"Quentin-Fran\\u00e7ois Boissier -Chiropracteur\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation de soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"28-01-2021\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"consultation de soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"01-04-2021\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"consultation de soins chiropratiques\", \"quantity\": 1, \"date_of_service\": \"18-02-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25198201_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770003846\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Amelia CHUNG\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEVILLIER Audrey\", \"beneficiary_dob\": null, \"invoice_date\": \"17-05-2021\", \"security_number\": \"284047713103190\", \"invoice_issuer\": \"Amelia CHUNG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"17-05-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25198509_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25198509_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300004074\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BOTTREAU Fanchon\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FERRIER Jonathan\", \"beneficiary_dob\": null, \"invoice_date\": \"07-05-2021\", \"security_number\": null, \"invoice_issuer\": \"BOTTREAU Fanchon\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"07-05-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25198581_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25198581_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660000027\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Eric ABEL\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"OLIVA Lauranne\", \"beneficiary_dob\": null, \"invoice_date\": \"27-05-2021\", \"security_number\": null, \"invoice_issuer\": \"Eric ABEL OSTEOPATHE D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins ost\\u00e9opathiques\", \"quantity\": 1, \"date_of_service\": \"27-05-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25201373_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770000321\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LETORD Philippe\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EYNARD Maxime\", \"beneficiary_dob\": \"25-07-1995\", \"invoice_date\": \"03-06-2021\", \"security_number\": null, \"invoice_issuer\": \"LETORD Philippe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-06-2021\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25203829_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25203829_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640000204\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien PARGADE\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MUSDE MINASSON Francis\", \"beneficiary_dob\": \"31-10-1954\", \"invoice_date\": \"08-02-2021\", \"security_number\": \"154106442218065\", \"invoice_issuer\": \"CENTRE MEDICAL Route de Monclar\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-02-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25203887_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25203887_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130015415\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Eric TRUCHET\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FACIOTTI BERENGERE\", \"beneficiary_dob\": null, \"invoice_date\": \"29-05-2021\", \"security_number\": \"27078619406713\", \"invoice_issuer\": \"Eric TRUCHET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"29-05-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25224007_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25224007_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"750029001\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pauline DUPERRAY D.C\", \"total_billed\": 100.0, \"bill_paid\": true, \"amount_paid\": 100.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M BIERNACKI Christophe\", \"beneficiary_dob\": \"19-08-1976\", \"invoice_date\": \"07-06-2021\", \"security_number\": null, \"invoice_issuer\": \"Pauline DUPERRAY D.C\", \"currency\": \"EUR\", \"items\": [{\"description\": \"premi\\u00e8re visite\", \"quantity\": 1, \"date_of_service\": \"14-04-2021\", \"mandatory_coverage\": null, \"amount\": 100}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25224008_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25224008_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"750029001\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pauline DUPERRAY D.C\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BIERNACKI Christophe\", \"beneficiary_dob\": \"19-08-1976\", \"invoice_date\": \"26-04-2021\", \"security_number\": null, \"invoice_issuer\": \"Pauline DUPERRAY D.C\", \"currency\": \"EUR\", \"items\": [{\"description\": \"suivi de consultation\", \"quantity\": 1, \"date_of_service\": \"26-04-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25224009_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25224009_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"750029001\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pauline DUPERRAY D.C\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M BIERNACKI Christophe\", \"beneficiary_dob\": \"19-08-1976\", \"invoice_date\": \"07-06-2021\", \"security_number\": null, \"invoice_issuer\": \"Pauline DUPERRAY D.C\", \"currency\": \"EUR\", \"items\": [{\"description\": \"suivi de consultation\", \"quantity\": 1, \"date_of_service\": \"07-06-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25233927_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25233927_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350008470\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laur\\u00e8ne DAVID\", \"total_billed\": 53.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JAUNATRE Catherine\", \"beneficiary_dob\": null, \"invoice_date\": \"27-02-2021\", \"security_number\": \"279024410950324\", \"invoice_issuer\": \"Laur\\u00e8ne DAVID\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"27-02-2021\", \"mandatory_coverage\": null, \"amount\": 53.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25233928_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350008470\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laur\\u00e8ne DAVID\", \"total_billed\": 53.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KLEINBAUER Luc\", \"beneficiary_dob\": null, \"invoice_date\": \"17-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Laur\\u00e8ne DAVID\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"17-02-2021\", \"mandatory_coverage\": null, \"amount\": 53.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25233929_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350005229\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fr\\u00e9d\\u00e9ric PAVOINE\", \"total_billed\": 53, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JAUNATRE Catherine\", \"beneficiary_dob\": null, \"invoice_date\": \"10-03-2021\", \"security_number\": \"279024410950324\", \"invoice_issuer\": \"Fr\\u00e9d\\u00e9ric PAVOINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"HONORAIRES D'OSTEOPATHIE\", \"quantity\": null, \"date_of_service\": \"10-03-2021\", \"mandatory_coverage\": null, \"amount\": 53}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25236328_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060008307\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DELMARRE Thierry\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANTEUFEL Sandra\", \"beneficiary_dob\": \"03-05-1983\", \"invoice_date\": \"27-03-2021\", \"security_number\": \"283050600402073\", \"invoice_issuer\": \"DELMARRE Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 SEANCE(S) DE NORMALISATIONS VERTEBRALES ET SQUELETTIQUES PAR OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"27-03-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25236329_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060008307\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DELMARRE Thierry\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANTEUFEL Sandra\", \"beneficiary_dob\": \"03-05-1983\", \"invoice_date\": \"18-03-2021\", \"security_number\": \"283050600402073\", \"invoice_issuer\": \"DELMARRE Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE(S) DE NORMALISATIONS VERTEBRALES ET SQUELETTIQUES PAR OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"18-03-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25236744_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25236744_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060011889\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PILLONE Margot\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FANTINO Jeannette\", \"beneficiary_dob\": null, \"invoice_date\": \"30-04-2021\", \"security_number\": null, \"invoice_issuer\": \"PILLONE Margot\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie \\u00e0 domicile\", \"quantity\": 1, \"date_of_service\": \"30-04-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25239522_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"15-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25239523_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"04-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25239524_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25239524_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Killian FINET\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"18-02-2021\", \"security_number\": null, \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25241935_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25241935_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130000409\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ESNAULT Gilles\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FILLOUX Sandra\", \"beneficiary_dob\": null, \"invoice_date\": \"11-06-2022\", \"security_number\": null, \"invoice_issuer\": \"ESNAULT Gilles\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Nombre d'acte(s) pratiqu\\u00e9(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/creer-bon-de-livraison-antsroute-7_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/modele-cdi_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/facture-mode-demploi-site-internet_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ro_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/00ac2a76-52ec-437a-8477-a573d62ecbe4_Livret_de_Famille_2024_1-2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/comprendre-ma-facture-edf-exemple_F4nHAoq_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/A29dda8c-1fdf25dfdffd54124d_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25245480_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25245480_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660000720\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Matthieu AUGE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fernandez Sandro\", \"beneficiary_dob\": null, \"invoice_date\": \"16-06-2021\", \"security_number\": null, \"invoice_issuer\": \"Matthieu AUGE OSTEOPATHE D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathy session\", \"quantity\": 1, \"date_of_service\": \"16-06-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660000720\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Matthieu AUGE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fernandez Sandro\", \"beneficiary_dob\": null, \"invoice_date\": \"02-06-2021\", \"security_number\": null, \"invoice_issuer\": \"Matthieu AUGE OSTEOPATHE D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathy session\", \"quantity\": 1, \"date_of_service\": \"02-06-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25246953_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25246953_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"131138802\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean CECCALDI\", \"total_billed\": 53, \"bill_paid\": true, \"amount_paid\": 53, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Bichonnou Jean Michel\", \"beneficiary_dob\": null, \"invoice_date\": \"13-06-2021\", \"security_number\": \"162101305543073\", \"invoice_issuer\": \"Docteur Jean CECCALDI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 53}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25247116_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25247116_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"210001772\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"QUENTIN CHARBONNEAU\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Henot Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"08-06-2021\", \"security_number\": \"1741003301940\", \"invoice_issuer\": \"QUENTIN CHARBONNEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"UNE SEANCE D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"08-06-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25248536_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330034433\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien MACAUD\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLOCH Jacqueline\", \"beneficiary_dob\": null, \"invoice_date\": \"15-06-2021\", \"security_number\": \"242076058400255\", \"invoice_issuer\": \"Julien MACAUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"15-06-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25251351_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"450002480\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"F\\u00e9lix ROS\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Annie Jean-Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"02-06-2021\", \"security_number\": null, \"invoice_issuer\": \"F\\u00e9lix ROS - Ost\\u00e9opathe D.O.F.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"02-06-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25257105_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130012529\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M.Vincent Volle\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FIEFFE Benoit\", \"beneficiary_dob\": null, \"invoice_date\": \"15-06-2021\", \"security_number\": \"164077511435847\", \"invoice_issuer\": \"M.Vincent Volle\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-06-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25263636_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"170000178\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GIBOUT OLIVIER\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"TELLIER Daniel\", \"insured_dob\": null, \"beneficiary_name\": \"TELLIER Daniel\", \"beneficiary_dob\": null, \"invoice_date\": \"28-05-2021\", \"security_number\": \"1540577427002\", \"invoice_issuer\": \"GIBOUT OLIVIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": null, \"date_of_service\": \"28-05-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25263730_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780006813\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maurane BONDIS\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Denis SEURAT\", \"beneficiary_dob\": null, \"invoice_date\": \"11-05-2021\", \"security_number\": null, \"invoice_issuer\": \"Maurane BONDIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-05-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25265881_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440002293\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Iris Cornic\", \"total_billed\": 45.0, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cohard Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": \"16-06-2021\", \"security_number\": null, \"invoice_issuer\": \"Iris Cornic\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-06-2021\", \"mandatory_coverage\": null, \"amount\": 45.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25267998_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640003950\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MATHILDE B\\u00c9D\\u00c8RE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HELENE FELTEN\", \"beneficiary_dob\": \"26-05-1977\", \"invoice_date\": \"01-06-2021\", \"security_number\": \"277053523856174\", \"invoice_issuer\": \"MATHILDE B\\u00c9D\\u00c8RE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-06-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25270267_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nans BEAUVALLET\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLORENTIN GUILLEMET Brigitte\", \"beneficiary_dob\": null, \"invoice_date\": \"28-06-2021\", \"security_number\": null, \"invoice_issuer\": \"Nans BEAUVALLET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"intervention(s) en \\u00e9tiopathie\", \"quantity\": 1, \"date_of_service\": \"28-06-2021\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25270268_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pierre-Paul ROUQUETTE\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLORENTIN GUILLEMET Brigitte\", \"beneficiary_dob\": null, \"invoice_date\": \"11-06-2021\", \"invoice_issuer\": \"Pierre-Paul ROUQUETTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"intervention(s) en \\u00e9tiopathie\", \"quantity\": 1, \"date_of_service\": \"11-06-2021\", \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25273058_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000644\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERNEAU Emilie\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Faure Kainam\", \"beneficiary_dob\": null, \"invoice_date\": \"27-04-2021\", \"security_number\": null, \"invoice_issuer\": \"BERNEAU Emilie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Seance d'osteopathie\", \"quantity\": 1, \"date_of_service\": \"27-04-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25273059_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000644\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERNEAU Emilie\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Faure Julien\", \"beneficiary_dob\": null, \"invoice_date\": \"27-04-2021\", \"security_number\": null, \"invoice_issuer\": \"BERNEAU Emilie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 Seance d'osteopathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25273060_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25273060_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000644\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERNEAU Emilie\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M Faure Kainam\", \"beneficiary_dob\": null, \"invoice_date\": \"22-07-2020\", \"security_number\": null, \"invoice_issuer\": \"BERNEAU Emilie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25273061_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630000644\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERNEAU Emilie\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M Faure Tiquela\", \"beneficiary_dob\": null, \"invoice_date\": \"22-07-2020\", \"security_number\": null, \"invoice_issuer\": \"BERNEAU Emilie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Seance d'osteopathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25279751_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"640002192\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PHILIPPE PIOT\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Etchebarren Daniele\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"PHILIPPE PIOT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation chiropratique\", \"quantity\": 1, \"date_of_service\": \"01-04-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25280844_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770004943\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE BATTEUX Jordan\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FEUTRY CORINE\", \"beneficiary_dob\": null, \"invoice_date\": \"13-02-2021\", \"security_number\": \"272085912225315\", \"invoice_issuer\": \"LE BATTEUX Jordan\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-02-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25280845_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25280845_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770004943\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE BATTEUX Jordan\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FEUTRY Corine\", \"beneficiary_dob\": null, \"invoice_date\": \"03-04-2021\", \"security_number\": \"272085912225315\", \"invoice_issuer\": \"LE BATTEUX Jordan\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-04-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25284266_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"667012975\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Bernard VIOLARD\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FIRON Anthony\", \"beneficiary_dob\": null, \"invoice_date\": \"01-07-2021\", \"security_number\": null, \"invoice_issuer\": \"Bernard VIOLARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-07-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25290421_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25290421_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300000015\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"R\\u00e9my VESSE\", \"total_billed\": 75, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FLORES\", \"insured_dob\": null, \"beneficiary_name\": \"FLORES Antoinette\", \"beneficiary_dob\": null, \"invoice_date\": \"03-06-2021\", \"security_number\": null, \"invoice_issuer\": \"R\\u00e9my VESSE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25292676_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAURE AURELIE\", \"beneficiary_dob\": null, \"invoice_date\": \"05-07-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-07-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25292677_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAURE AURELIE\", \"beneficiary_dob\": null, \"invoice_date\": \"30-06-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-06-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25293643_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"840000103\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nicolas DEVINE\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": \"30-07-1949\", \"beneficiary_name\": \"CHANDOYSSON jean Paul\", \"beneficiary_dob\": \"30-07-1949\", \"invoice_date\": \"09-11-2021\", \"security_number\": \"149079935259928\", \"invoice_issuer\": \"Nicolas DEVINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"09-11-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25295241_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dr Pascal HERLEM\", \"total_billed\": 25.0, \"bill_paid\": true, \"amount_paid\": 25.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laurence MIRAILLES\", \"beneficiary_dob\": \"03-01-1977\", \"invoice_date\": \"07-06-2021\", \"security_number\": \"277011305507284\", \"invoice_issuer\": \"Dr Pascal HERLEM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acte d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"07-06-2021\", \"mandatory_coverage\": null, \"amount\": 25.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25300559_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"110002367\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marta Puente Mu\\u00f1oz\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIRGINIE FERNANDEZ\", \"beneficiary_dob\": null, \"invoice_date\": \"25-04-2021\", \"security_number\": null, \"invoice_issuer\": \"Marta Puente Mu\\u00f1oz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25302057_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770003846\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Amelia CHUNG\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEVILLIER Audrey\", \"beneficiary_dob\": null, \"invoice_date\": \"21-06-2021\", \"security_number\": \"284047713103190\", \"invoice_issuer\": \"Amelia CHUNG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"21-06-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25302242_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25302242_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130014566\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe BIFULCO\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FIARDO Cyrille\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"181061305590855\", \"invoice_issuer\": \"Philippe BIFULCO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25302987_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"650000078\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabienne SIGAUD-GARCIA\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAVE FERNAND\", \"beneficiary_dob\": \"15-01-1947\", \"invoice_date\": \"19-01-2021\", \"security_number\": null, \"invoice_issuer\": \"Fabienne SIGAUD-GARCIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25302988_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25302988_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640003398\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHABBERT Vincent\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fouc Fernand\", \"beneficiary_dob\": \"15-01-1947\", \"invoice_date\": \"15-06-2021\", \"security_number\": null, \"invoice_issuer\": \"CHABBERT Vincent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathic consultation\", \"quantity\": null, \"date_of_service\": \"15-06-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25305731_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770006658\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cindy De Oliveira\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Faivre Nathalie\", \"beneficiary_dob\": null, \"invoice_date\": \"08-07-2021\", \"security_number\": \"272021126205625\", \"invoice_issuer\": \"Cindy De Oliveira\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-07-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25309353_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300000015\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"R\\u00e9my VESSE\", \"total_billed\": 75, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLORES Antoinette\", \"beneficiary_dob\": null, \"invoice_date\": \"05-07-2021\", \"security_number\": null, \"invoice_issuer\": \"R\\u00e9my VESSE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25309354_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25309354_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300000015\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"R\\u00e9my VESSE\", \"total_billed\": 75, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M FLORES Jean-No\\u00ebl\", \"beneficiary_dob\": null, \"invoice_date\": \"09-07-2021\", \"security_number\": null, \"invoice_issuer\": \"R\\u00e9my VESSE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25310144_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"600002968\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE BRAS Alexandra\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COLIGNON Arnaud\", \"beneficiary_dob\": null, \"invoice_date\": \"15-07-2021\", \"security_number\": null, \"invoice_issuer\": \"LE BRAS Alexandra\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25310782_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"947001418\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LAHMY Gr\\u00e9gory\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHETARD Roger\", \"beneficiary_dob\": \"16-05-1937\", \"invoice_date\": \"16-07-2021\", \"security_number\": \"137057511025212\", \"invoice_issuer\": \"LAHMY Gr\\u00e9gory\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation et soins d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-07-2021\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25314009_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAURE AURELIE\", \"beneficiary_dob\": null, \"invoice_date\": \"15-07-2021\", \"security_number\": \"282078708528129\", \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-07-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25315620_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"400000667\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M\\u00e9lody Frecaut\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUVEL XAVIER\", \"beneficiary_dob\": null, \"invoice_date\": \"28-06-2021\", \"security_number\": \"160117115308503\", \"invoice_issuer\": \"M\\u00e9lody Frecaut\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"28-06-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25316942_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25316942_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640001608\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LOPEZ ROMAIN\", \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ELGUE ANTHONY\", \"beneficiary_dob\": null, \"invoice_date\": \"11-05-2021\", \"security_number\": null, \"invoice_issuer\": \"LOPEZ ROMAIN OSTEOPATHE DNO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE D'OSTEOPATHIE - Consultation\", \"quantity\": 1, \"date_of_service\": \"11-05-2021\", \"mandatory_coverage\": null, \"amount\": 30.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25322026_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620000588\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Perrine Nolf\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laurent Flament\", \"beneficiary_dob\": null, \"invoice_date\": \"21-07-2021\", \"security_number\": null, \"invoice_issuer\": \"Perrine Nolf\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-07-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25326952_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950003285\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mathias LE Roux\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DELARUE Lise\", \"beneficiary_dob\": null, \"invoice_date\": \"11-07-2021\", \"security_number\": null, \"invoice_issuer\": \"Mathieu Roux\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-07-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25330401_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25330401_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340006089\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RIBEIRO C\\u00e9line\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GOMEZ Karine\", \"beneficiary_dob\": null, \"invoice_date\": \"18-05-2021\", \"security_number\": \"272113430107805\", \"invoice_issuer\": \"RIBEIRO C\\u00e9line\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-05-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25331768_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220002984\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Redon Aur\\u00e9lie\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chantal Capliez\", \"beneficiary_dob\": null, \"invoice_date\": \"27-07-2021\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie du val\", \"currency\": \"EUR\", \"items\": [{\"description\": \"HONORAIRES\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25332074_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"630001303\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Claude VERNHES\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FALCON Monique\", \"beneficiary_dob\": null, \"invoice_date\": \"28-07-2021\", \"security_number\": null, \"invoice_issuer\": \"JEAN-CLAUDE VERNHES D.C. CCSP\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation chiropratique\", \"quantity\": 1, \"date_of_service\": \"28-07-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25332784_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810000034\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PENDARIES-MENARD Carole\", \"total_billed\": 53, \"bill_paid\": true, \"amount_paid\": 53, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Madame Valentine FERNANDEZ\", \"beneficiary_dob\": null, \"invoice_date\": \"28-07-2021\", \"security_number\": \"282068100401855\", \"invoice_issuer\": \"PENDARIES-MENARD Carole\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Note d'honoraires\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 53}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25338240_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25338240_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"447850454\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jo\\u00ebl FRADET\", \"total_billed\": 66, \"bill_paid\": true, \"amount_paid\": 66, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLOCH RAY Anne Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"03-06-2021\", \"security_number\": \"224012904120118\", \"invoice_issuer\": \"Jo\\u00ebl FRADET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie r\\u00e9alis\\u00e9e et r\\u00e9gl\\u00e9e ce jour\", \"quantity\": 1, \"date_of_service\": \"03-06-2021\", \"mandatory_coverage\": null, \"amount\": 66}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25338241_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"447850454\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jo\\u00ebl FRADET\", \"total_billed\": 66, \"bill_paid\": true, \"amount_paid\": 66, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLOCH RAY Anne Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"01-07-2021\", \"security_number\": \"224012904120118\", \"invoice_issuer\": \"Jo\\u00ebl FRADET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie r\\u00e9alis\\u00e9e et r\\u00e9gl\\u00e9e ce jour.\", \"quantity\": 1, \"date_of_service\": \"01-07-2021\", \"mandatory_coverage\": null, \"amount\": 66}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25338995_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750019010\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Brice MANON\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FOGGIA SANDRO\", \"beneficiary_dob\": null, \"invoice_date\": \"27-07-2021\", \"security_number\": \"159039935114753\", \"invoice_issuer\": \"Brice MANON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"27-07-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25340725_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060008307\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DELMARRE Thierry\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANTEUFEL Sandra\", \"beneficiary_dob\": \"03-05-1983\", \"invoice_date\": \"16-07-2021\", \"security_number\": \"283050600402073\", \"invoice_issuer\": \"DELMARRE Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE(S) DE NORMALISATIONS VERTEBRALES ET SQUELETTIQUES PAR OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"16-07-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25340726_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"060008307\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DELMARRE Thierry\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANTEUFEL Ludovic\", \"beneficiary_dob\": \"27-04-1979\", \"invoice_date\": \"16-07-2021\", \"security_number\": \"179047511119205\", \"invoice_issuer\": \"DELMARRE Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE(S) DE NORMALISATIONS VERTEBRALES ET SQUELETTIQUES PAR OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"16-07-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25340740_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"680000304\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ludovic GERMONT\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ERMANT Muriel\", \"beneficiary_dob\": null, \"invoice_date\": \"12-04-2021\", \"security_number\": null, \"invoice_issuer\": \"Ludovic GERMONT\", \"currency\": \"EUR\", \"items\": [{\"description\": null, \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25341280_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001328\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. LAURENT MERIGUET\", \"total_billed\": 62.0, \"bill_paid\": true, \"amount_paid\": 62.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DANIELE ENAULT\", \"beneficiary_dob\": \"15-11-1947\", \"invoice_date\": \"27-04-2021\", \"security_number\": null, \"invoice_issuer\": \"M. LAURENT MERIGUET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-04-2021\", \"mandatory_coverage\": null, \"amount\": 62.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25341458_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"317016715\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne-Marie SCHERRER\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fin\\u00e9 Alain\", \"beneficiary_dob\": null, \"invoice_date\": \"12-07-2021\", \"security_number\": \"163053155579796\", \"invoice_issuer\": \"Madame Anne-Marie SCHERRER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-07-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25346392_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640000451\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MALAPLATE J\\u00e9r\\u00f4me\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PEYROUS DANIEL\", \"beneficiary_dob\": \"15-07-1975\", \"invoice_date\": \"03-06-2021\", \"security_number\": null, \"invoice_issuer\": \"MALAPLATE J\\u00e9r\\u00f4me\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OSTEO\", \"quantity\": 1, \"date_of_service\": \"03-06-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25347662_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25347662_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640006912\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DAUDIGNON Benoit\", \"total_billed\": 50.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 50.0, \"remaining_payment\": 50.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LOPPION ALEXIA\", \"beneficiary_dob\": \"01-09-1990\", \"invoice_date\": \"03-08-2021\", \"security_number\": null, \"invoice_issuer\": \"DAUDIGNON Benoit\", \"currency\": \"EUR\", \"items\": [{\"description\": \"HN (NR)\", \"quantity\": 1, \"date_of_service\": \"03-08-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25347945_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340000215\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fr\\u00e9d\\u00e9ric DEGUETTE\", \"total_billed\": 64.0, \"bill_paid\": true, \"amount_paid\": 64.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"OLIVA Lauranne\", \"beneficiary_dob\": null, \"invoice_date\": \"30-07-2021\", \"security_number\": \"200066613626775\", \"invoice_issuer\": \"Fr\\u00e9d\\u00e9ric DEGUETTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"30-07-2021\", \"mandatory_coverage\": null, \"amount\": 64.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25350829_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"820000602\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LECARPENTIER Hugo\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ENNAMLI Jessica\", \"beneficiary_dob\": null, \"invoice_date\": \"29-06-2021\", \"security_number\": \"285099402801849\", \"invoice_issuer\": \"LECARPENTIER Hugo\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation de soins ost\\u00e9opathiques et/ou des conseils \\u00e0 vis\\u00e9e th\\u00e9rapeutiques\", \"quantity\": 1, \"date_of_service\": \"29-06-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25352266_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25352266_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060011889\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PILLONE Margot\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FANTINO Jeannette\", \"beneficiary_dob\": null, \"invoice_date\": \"15-06-2021\", \"security_number\": \"2300221306900216\", \"invoice_issuer\": \"PILLONE Margot\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie \\u00e0 domicile\", \"quantity\": 1, \"date_of_service\": \"15-06-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060011889\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PILLONE Margot\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FANTINO Jeannette\", \"beneficiary_dob\": null, \"invoice_date\": \"15-06-2021\", \"security_number\": \"2300221306900216\", \"invoice_issuer\": \"PILLONE Margot\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie \\u00e0 domicile\", \"quantity\": 1, \"date_of_service\": \"19-05-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25352522_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25352522_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330028061\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Christophe GUIMOND\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FERHATI Aurore\", \"beneficiary_dob\": null, \"invoice_date\": \"03-08-2021\", \"security_number\": \"284121305501868\", \"invoice_issuer\": \"Christophe GUIMOND\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-08-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25352901_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25352901_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"647012459\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BITAILLOU Jean-Michel\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROQUES PASCALE\", \"beneficiary_dob\": \"12-05-1967\", \"invoice_date\": \"05-08-2021\", \"security_number\": \"267056443001893\", \"invoice_issuer\": \"BITAILLOU Jean-Michel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Un traitement d'acupuncture et de th\\u00e9rapie manuelle de type ost\\u00e9opathique, selon la m\\u00e9thode Sol\\u00e8re\", \"quantity\": 1, \"date_of_service\": \"05-08-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25356926_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25356926_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"020000147\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Romain POTTERIE\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PERSIAUX Marieline\", \"beneficiary_dob\": null, \"invoice_date\": \"15-06-2021\", \"security_number\": 281055954404176, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Romain POTTERIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"19-06-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25357010_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25357010_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"290002740\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HILY Jordan\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"micout Jean\", \"beneficiary_dob\": \"13-04-1934\", \"invoice_date\": \"16-07-2021\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Gouesnach\", \"currency\": \"EUR\", \"items\": [{\"description\": \"adulte - Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-07-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25357011_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25357011_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"290002740\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HILY Jordan\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"micout Jean\", \"beneficiary_dob\": \"13-04-1934\", \"invoice_date\": \"19-07-2021\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Gouesnach\", \"currency\": \"EUR\", \"items\": [{\"description\": \"adulte - Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-07-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25358481_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25358481_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"137901229\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe JUBERT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COSTA JOSEPH\", \"beneficiary_dob\": null, \"invoice_date\": \"07-07-2021\", \"security_number\": null, \"invoice_issuer\": \"Philippe JUBERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"traitement ost\\u00e9opathique\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25358482_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25358482_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"137901229\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe JUBERT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COSTA JOSEPH\", \"beneficiary_dob\": null, \"invoice_date\": \"21-07-2021\", \"security_number\": null, \"invoice_issuer\": \"Philippe JUBERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"traitement ost\\u00e9opathique\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25358990_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25358990_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"977407113\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PHILIPPE AUGUIE\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GECROS BABAMA\", \"beneficiary_dob\": null, \"invoice_date\": \"01-03-2021\", \"security_number\": null, \"invoice_issuer\": \"PHILIPPE AUGUIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-03-2022\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25360470_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25360470_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"930000997\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Naima ABDESLAM\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRONDIN EMILIE\", \"beneficiary_dob\": null, \"invoice_date\": \"10-06-2021\", \"security_number\": null, \"invoice_issuer\": \"Naima ABDESLAM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acte d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25360588_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25360588_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590008868\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Rivoal Bertrand\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ferret Maxime\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2021\", \"security_number\": null, \"invoice_issuer\": \"Rivoal Bertrand\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Seance osteopathie\", \"quantity\": 1, \"date_of_service\": \"11-08-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25360589_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25360589_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590008868\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Rivoal Bertrand\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ferret Maxime\", \"beneficiary_dob\": null, \"invoice_date\": \"22-07-2021\", \"security_number\": null, \"invoice_issuer\": \"Rivoal Bertrand\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Seance osteopathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25361051_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25361051_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950000356\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"R\\u00e9mi BLANCHOT\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUCHER Severine\", \"beneficiary_dob\": null, \"invoice_date\": \"09-08-2021\", \"security_number\": null, \"invoice_issuer\": \"R\\u00e9mi BLANCHOT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-08-2021\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25361117_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25361117_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770003267\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas Beraut\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MYRIAM LAVERGNE\", \"beneficiary_dob\": null, \"invoice_date\": \"12-08-2021\", \"security_number\": \"267124410935844\", \"invoice_issuer\": \"BERAUT Thomas\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"12-08-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25361118_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25361118_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770003267\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas B\\u00e9raut\", \"total_billed\": 50.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 50.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MYRIAM LAVERGNE\", \"beneficiary_dob\": null, \"invoice_date\": \"30-07-2021\", \"security_number\": \"267124410935844\", \"invoice_issuer\": \"BERAUT Thomas\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25365107_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"490003613\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ANGEBAULT Dylan\", \"total_billed\": 48.0, \"bill_paid\": true, \"amount_paid\": 48.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAVREAU Gunther\", \"beneficiary_dob\": null, \"invoice_date\": \"16-08-2021\", \"security_number\": null, \"invoice_issuer\": \"ANGEBAULT Dylan - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-08-2021\", \"mandatory_coverage\": null, \"amount\": 48.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25365393_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"EVANNE FRITSCH\", \"total_billed\": 80.0, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FAUCQUEZ Monique\", \"insured_dob\": \"22-06-1952\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"29-07-2021\", \"security_number\": \"2520621231179\", \"invoice_issuer\": \"Cabinet sant\\u00e9 Victoire EVANNE FRITSCH\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-07-2021\", \"mandatory_coverage\": null, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25370411_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940002223\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Delphine DIAZ\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ESCUDIERE Mauricette\", \"beneficiary_dob\": null, \"invoice_date\": \"18-08-2021\", \"security_number\": \"233059935344182\", \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Delphine DIAZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-08-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25370412_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940002223\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Delphine DIAZ\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ESCUDIERE Mauricette\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2021\", \"security_number\": \"233059935344182\", \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Delphine DIAZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-08-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25374417_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"310012505\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DIDIER H\\u00e9lo\\u00efse\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FIORENTINI Anne-Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"17-08-2021\", \"security_number\": \"2470331582006\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie de saint Orens de Gameville\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation\", \"quantity\": 1, \"date_of_service\": \"17-08-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25375639_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25375639_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"9A0000223\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RIGA JEAN PHILIPPE Maritza\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOUCHAREB Sihma\", \"beneficiary_dob\": null, \"invoice_date\": \"12-03-2021\", \"security_number\": \"274014732303045\", \"invoice_issuer\": \"Cabinet Maritza RIGA JEAN PHILIPPE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de BOUCHAREB Sihma\", \"quantity\": 1, \"date_of_service\": \"12-03-2021\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25375641_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25375641_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"9A0000223\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RIGA JEAN PHILIPPE Maritza\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ERHARD Franck\", \"beneficiary_dob\": null, \"invoice_date\": \"12-03-2021\", \"security_number\": \"172067511607160\", \"invoice_issuer\": \"Cabinet Maritza RIGA JEAN PHILIPPE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de ERHARD Franck\", \"quantity\": 1, \"date_of_service\": \"12-03-2021\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25375642_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25375642_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"9A0000223\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RIGA JEAN PHILIPPE Maritza\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ERHARD Franck\", \"beneficiary_dob\": null, \"invoice_date\": \"04-09-2019\", \"security_number\": \"172067511607160\", \"invoice_issuer\": \"SELASU MRJP\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de ERHARD Franck\", \"quantity\": 1, \"date_of_service\": \"04-09-2019\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25379054_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25379054_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"9A0000223\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RIGA JEAN PHILIPPE Maritza\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOUCHAREB Sihma\", \"beneficiary_dob\": null, \"invoice_date\": \"12-03-2021\", \"security_number\": \"274014732303045\", \"invoice_issuer\": \"Cabinet Maritza RIGA JEAN PHILIPPE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de BOUCHAREB Sihma\", \"quantity\": 1, \"date_of_service\": \"12-03-2021\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25379394_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640001103\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PRIGENT Chlo\\u00e9\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CASADEBAIG Paulette\", \"beneficiary_dob\": null, \"invoice_date\": \"28-07-2021\", \"security_number\": \"247046432001090\", \"invoice_issuer\": \"Ossau-Ost\\u00e9o\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires pour une consultation d'ost\\u00e9opathie de CASADEBAIG Paulette le 28/07/2021 \\u00e0 Izeste.\", \"quantity\": 1, \"date_of_service\": \"28-07-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25381708_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750019325\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Niti PAL\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PROVOST HELENE\", \"beneficiary_dob\": null, \"invoice_date\": \"17-08-2021\", \"security_number\": \"270076311343973\", \"invoice_issuer\": \"Niti PAL - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-08-2021\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25381709_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"757013867\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VAFIADES VINCENT\", \"total_billed\": 40.0, \"bill_paid\": true, \"amount_paid\": 40.0, \"mandatory_coverage\": 0.0, \"complementary_coverage\": null, \"client_part\": 40.0, \"remaining_payment\": 0.0, \"insured_name\": \"PROVOST HELENE\", \"insured_dob\": \"29-07-1970\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"26-08-2021\", \"security_number\": \"2700763113439\", \"invoice_issuer\": \"VAFIADES VINCENT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"HN\", \"quantity\": 1, \"date_of_service\": \"26-08-2021\", \"mandatory_coverage\": 0.0, \"amount\": 40.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25382408_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130010663\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Guillaume Escallier-Lachaup\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Filloux Eric\", \"beneficiary_dob\": \"12/02/1970\", \"invoice_date\": \"24-08-2021\", \"security_number\": \"170028715404514\", \"invoice_issuer\": \"Ost\\u00e9opathes D.O.F\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"24-08-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25382451_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130001431\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"J\\u00e9rome ROUVIER\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ferhati Aurore\", \"beneficiary_dob\": null, \"invoice_date\": \"13-08-2021\", \"security_number\": \"2841213055018\", \"invoice_issuer\": \"J\\u00e9rome ROUVIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie A\", \"quantity\": 1, \"date_of_service\": \"13-08-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25382470_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770000842\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"R\\u00e9nald THOUVENIN\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JEAN Sandrine\", \"beneficiary_dob\": null, \"invoice_date\": \"07-08-2021\", \"security_number\": \"269043306301493\", \"invoice_issuer\": \"R\\u00e9nald THOUVENIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"traitement ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"07-08-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25382471_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770000842\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"R\\u00e9nald THOUVENIN\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"LECLERCQ Ludovic\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"25-08-2021\", \"security_number\": \"199049304814588\", \"invoice_issuer\": \"R\\u00e9nald THOUVENIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"traitement ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"25-08-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25382472_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770000842\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"R\\u00e9nald THOUVENIN\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"LECLERCQ Alexiane\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"17-08-2021\", \"security_number\": \"296119300731596\", \"invoice_issuer\": \"R\\u00e9nald THOUVENIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"traitement ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"17-08-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25382477_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marilyn Dutartre\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Monsieur Faivre-Courtot\", \"beneficiary_dob\": null, \"invoice_date\": \"08-06-2021\", \"security_number\": \"169069304824436\", \"invoice_issuer\": \"Marilyn Dutartre\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance individuelle de sophrologie\", \"quantity\": 1, \"date_of_service\": \"08-06-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25382478_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marilyn Dutartre\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Monsieur Faivre-Courtot\", \"beneficiary_dob\": null, \"invoice_date\": \"23-06-2021\", \"security_number\": \"169069304824436\", \"invoice_issuer\": \"Marilyn Dutartre\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance individuelle de sophrologie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25382479_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marilyn Dutartre\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Monsieur Faivre-Courtot\", \"beneficiary_dob\": null, \"invoice_date\": \"06-07-2021\", \"security_number\": \"169069304824436\", \"invoice_issuer\": \"Marilyn Dutartre\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance individuelle de sophrologie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25392428_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770004257\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dombrecht Thibault\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fernandez Mathilde\", \"beneficiary_dob\": null, \"invoice_date\": \"10-08-2021\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Dombrecht Thibault\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-08-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25405311_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770003846\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Amelia CHUNG\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEVILLIER Maxime\", \"beneficiary_dob\": null, \"invoice_date\": \"02-08-2021\", \"security_number\": \"1850595277108\", \"invoice_issuer\": \"Amelia CHUNG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"02-08-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25406038_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130020068\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Charl\\u00e8ne PAROLA\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EL MALEK Eric\", \"beneficiary_dob\": \"28-03-1967\", \"invoice_date\": \"25-08-2021\", \"security_number\": \"167037851506645\", \"invoice_issuer\": \"Charl\\u00e8ne PAROLA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"25-08-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25406488_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25406488_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"760003939\", \"rpps_number\": \"10001971158\", \"finess_number\": null, \"doctor_name\": \"MARIE-HELENE DAUGUET\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": 30, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE VISAGE Jean-Yves\", \"beneficiary_dob\": \"07-12-1966\", \"invoice_date\": \"06-09-2021\", \"security_number\": \"1661276498829\", \"invoice_issuer\": \"DOCTEUR MARIE-HELENE DAUGUET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation m\\u00e9dicale et soins d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-09-2021\", \"mandatory_coverage\": 30, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25414972_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25414972_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"680000536\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GUIDANTONI Thibaut\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FERLIN Corinne\", \"insured_dob\": null, \"beneficiary_name\": \"FERLIN Corinne\", \"beneficiary_dob\": null, \"invoice_date\": \"07-09-2021\", \"security_number\": \"273073000704625\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de FERLIN Corinne\", \"quantity\": 1, \"date_of_service\": \"07-09-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25416514_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"270000359\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mme MICHOUX Chrystelle\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FERRARA Elianne\", \"beneficiary_dob\": null, \"invoice_date\": \"02-09-2021\", \"security_number\": \"242078619001208\", \"invoice_issuer\": \"Mme MICHOUX Chrystelle\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Note d' Honoraires\", \"quantity\": null, \"date_of_service\": \"02-09-021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25417095_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25417095_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CANAVATE Marc\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 70.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEGRAND MARTINE\", \"beneficiary_dob\": \"07-03-1957\", \"invoice_date\": \"08-09-2021\", \"security_number\": \"2570344120013\", \"invoice_issuer\": \"MGP\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25417096_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"1534\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CANAVATE Marc\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEGRAND Panture\", \"beneficiary_dob\": null, \"invoice_date\": \"08/09/2021\", \"security_number\": null, \"invoice_issuer\": \"CANAVATE Marc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Visite\", \"quantity\": null, \"date_of_service\": \"08-09-21\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25421654_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350004701\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Luc ROUAUX\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean-Claude EZAN\", \"beneficiary_dob\": null, \"invoice_date\": \"13-09-2021\", \"security_number\": \"1520535288108\", \"invoice_issuer\": \"Jean-Luc ROUAUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25421753_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130001282\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Fran\\u00e7ois Bouhier\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Robin Alain\", \"beneficiary_dob\": null, \"invoice_date\": \"10-09-2021\", \"security_number\": null, \"invoice_issuer\": \"Jean-Fran\\u00e7ois Bouhier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"10-09-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25426128_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590000121\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Bernard BLONDIAU\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marli\\u00e8re Cedric\", \"beneficiary_dob\": null, \"invoice_date\": \"10-09-2021\", \"security_number\": \"177085935031202\", \"invoice_issuer\": \"Bernard BLONDIAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-09-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25446714_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"170000251\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LARRAGNAGUE \\u00c9lisa\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BRUGGISSER GWENDOLYN\", \"beneficiary_dob\": \"21-10-1985\", \"invoice_date\": \"21-09-2021\", \"security_number\": \"285107511232678\", \"invoice_issuer\": \"LARRAGNAGUE \\u00c9lisa\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires\", \"quantity\": 1, \"date_of_service\": \"21-09-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25449739_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"1534\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CANAVATE Marc\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"leceang Mature\", \"beneficiary_dob\": null, \"invoice_date\": \"22-09-2021\", \"security_number\": null, \"invoice_issuer\": \"CANAVATE Marc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Visite\", \"quantity\": 1, \"date_of_service\": \"22-09-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25452722_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810001750\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vincent REVEILLON\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTINEZ Fabienne\", \"beneficiary_dob\": \"20-12-1971\", \"invoice_date\": \"06-04-2021\", \"security_number\": \"271121220209033\", \"invoice_issuer\": \"Vincent REVEILLON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-04-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25452723_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25452723_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810001750\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vincent REVEILLON\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTINEZ Fabienne\", \"beneficiary_dob\": \"20-12-1971\", \"invoice_date\": \"29-03-2021\", \"security_number\": null, \"invoice_issuer\": \"Vincent REVEILLON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-03-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25452755_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910001353\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florian Hemonic\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Michel Rebillard\", \"beneficiary_dob\": \"14-09-1964\", \"invoice_date\": \"23-09-2021\", \"security_number\": \"164095314758637\", \"invoice_issuer\": \"Florian Hemonic\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"23-09-2021\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25467913_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330005737\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"QUEMENER Erwan\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AVILA SANDRINE\", \"beneficiary_dob\": null, \"invoice_date\": \"19-08-2021\", \"security_number\": \"273082901915584\", \"invoice_issuer\": \"QUEMENER Erwan\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Pour un acte d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25489070_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25489070_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060003084\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"louis delhotellerie\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANTEUFEL Ludovic\", \"beneficiary_dob\": null, \"invoice_date\": \"28-09-2021\", \"security_number\": null, \"invoice_issuer\": \"louis delhotellerie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathy consultation\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25489071_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25489071_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060003084\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"louis delhotellerie\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANTEUFEL Ludovic\", \"beneficiary_dob\": null, \"invoice_date\": \"01-10-2021\", \"security_number\": null, \"invoice_issuer\": \"louis delhotellerie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathy consultation\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25496123_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130001282\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Fran\\u00e7ois Bouhier\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Robin Alain\", \"beneficiary_dob\": null, \"invoice_date\": \"27-09-2021\", \"security_number\": null, \"invoice_issuer\": \"Jean-Fran\\u00e7ois Bouhier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"27-09-2021\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25627904_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770003846\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Am\\u00e9lia CHUNG\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEVILLIER Maxime\", \"beneficiary_dob\": null, \"invoice_date\": \"11-10-2021\", \"security_number\": \"1850595277108\", \"invoice_issuer\": \"Amelia CHUNG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"11-10-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25631712_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"400000709\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"St\\u00e9phane KLUG\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JALLAN OIHANA\", \"beneficiary_dob\": null, \"invoice_date\": \"15-10-2021\", \"security_number\": null, \"invoice_issuer\": \"St\\u00e9phane KLUG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-10-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25632678_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25632678_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300004611\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"perez vincent\", \"total_billed\": 50.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PALBACUER G\\u00e9naldine\", \"beneficiary_dob\": null, \"invoice_date\": \"14-04-2021\", \"security_number\": null, \"invoice_issuer\": \"perez vincent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9o\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25632679_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25632679_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300004611\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"perez vincent\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PALPACUER G\\u00e9naldine\", \"beneficiary_dob\": null, \"invoice_date\": \"26-05-2021\", \"security_number\": null, \"invoice_issuer\": \"perez vincent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-05-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25632680_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25632680_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"307009902\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vincent Perez\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DELANZY G\\u00e9raldine\", \"beneficiary_dob\": null, \"invoice_date\": \"03-04-2019\", \"security_number\": null, \"invoice_issuer\": \"Perez Vincent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathe\", \"quantity\": 1, \"date_of_service\": \"03-04-2019\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25792885_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"710000563\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Camille Bouthenet\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GLORIEUX JEAN-PIERRE\", \"beneficiary_dob\": null, \"invoice_date\": \"25-10-2021\", \"security_number\": \"146053941400116\", \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25792926_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"020000477\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CLAIRE UBEDA\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SONATORE FRANCOIS\", \"beneficiary_dob\": \"02-08-1978\", \"invoice_date\": \"21-10-2021\", \"security_number\": \"178080240801549\", \"invoice_issuer\": \"CLAIRE UBEDA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-10-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25793641_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"597091032\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORIN S\\u00e9bastien\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHOMBARD St\\u00e9phane\", \"beneficiary_dob\": null, \"invoice_date\": \"30-08-2021\", \"security_number\": \"176095928601949\", \"invoice_issuer\": \"FLORIN S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-08-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25826742_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"510001878\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Rodolphe DEMANGE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Pierre Mabe\", \"beneficiary_dob\": null, \"invoice_date\": \"04-11-2021\", \"security_number\": \"289025145412147\", \"invoice_issuer\": \"Rodolphe DEMANGE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-11-2021\", \"mandatory_coverage\": null, \"amount\": 54.54}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25841816_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25841816_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"21-09-2021\", \"security_number\": \"165035746324356\", \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25842536_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"337043335\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurent MARIA\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE BOULCH Marie ne\\u00e9 Rodriguez\", \"beneficiary_dob\": null, \"invoice_date\": \"02-11-2021\", \"security_number\": \"2480233119033\", \"invoice_issuer\": \"Laurent MARIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"02-11-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25842537_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"330010083\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurent MARIA\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE BOULCH Marie ne\\u00e9 Rodriguez\", \"beneficiary_dob\": null, \"invoice_date\": \"26-07-2021\", \"security_number\": \"2480233119033\", \"invoice_issuer\": \"Laurent MARIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-07-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25842538_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"337043335\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurent MARIA\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOSCH Jean hene\", \"beneficiary_dob\": null, \"invoice_date\": \"02-11-2021\", \"security_number\": \"1511033039010\", \"invoice_issuer\": \"Laurent MARIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"02-11-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25847017_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25847017_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 50, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"21-09-2021\", \"security_number\": \"165035746324356\", \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25848670_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25848670_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"21-09-2021\", \"security_number\": \"165035746324356\", \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25848722_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25848722_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FINET Killian\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"21-09-2021\", \"security_number\": \"165035746324356\", \"invoice_issuer\": \"Centre d'ost\\u00e9opathie de Montigny-l\\u00e8s-Metz\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25849685_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770003846\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Am\\u00e9lia CHUNG\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEVILLIER Bastien\", \"beneficiary_dob\": null, \"invoice_date\": \"03-11-2021\", \"security_number\": \"1850595277108\", \"invoice_issuer\": \"Am\\u00e9lia CHUNG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-11-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25863446_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750014938\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas GRANBOIS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lemoine Jean Yves\", \"beneficiary_dob\": null, \"invoice_date\": \"17-11-2021\", \"security_number\": \"14012611600692\", \"invoice_issuer\": \"Thomas GRANBOIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-11-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25864356_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"337043335\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurent MARIA\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE BOULCH Marie ne\\u00e9 Rodriguez\", \"beneficiary_dob\": null, \"invoice_date\": \"02-11-2021\", \"security_number\": \"248023311903370\", \"invoice_issuer\": \"Laurent MARIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"02-11-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25864357_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"337043335\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurent MARIA\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE BOULCH Marie ne\\u00e9 Rodriguez\", \"beneficiary_dob\": null, \"invoice_date\": \"26-07-2021\", \"security_number\": \"2480233119033\", \"invoice_issuer\": \"Laurent MARIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"26-07-2021\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25875089_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750005316\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9cile Ch\\u00e9reau\", \"total_billed\": 95, \"bill_paid\": true, \"amount_paid\": 95, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JACQUET Corine\", \"beneficiary_dob\": null, \"invoice_date\": \"05-10-2021\", \"security_number\": null, \"invoice_issuer\": \"C\\u00e9cile Ch\\u00e9reau\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 95}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25880926_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"70709401C\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe QUENOT\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRANDHAY Jean-Claude\", \"beneficiary_dob\": null, \"invoice_date\": \"19-11-2021\", \"security_number\": null, \"invoice_issuer\": \"Philippe QUENOT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance D'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-11-2021\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25885703_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920007200\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fr\\u00e9d\\u00e9ric CANTIEN\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DIBATERE MADY\", \"beneficiary_dob\": null, \"invoice_date\": \"25-11-2021\", \"security_number\": null, \"invoice_issuer\": \"Fr\\u00e9d\\u00e9ric CANTIEN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25889716_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060007358\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Chlo\\u00e9 Charpentier\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Nicolino Claire\", \"beneficiary_dob\": null, \"invoice_date\": \"05-11-2021\", \"security_number\": null, \"invoice_issuer\": \"Chlo\\u00e9 Charpentier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathic consultation\", \"quantity\": 1, \"date_of_service\": \"05-11-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25889717_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25889717_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060007358\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Chlo\\u00e9 Charpentier\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Nicolino Claire\", \"beneficiary_dob\": null, \"invoice_date\": \"24-09-2021\", \"security_number\": null, \"invoice_issuer\": \"Chlo\\u00e9 Charpentier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathic consultation\", \"quantity\": 1, \"date_of_service\": \"24-09-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25911689_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220001531\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BERTIER R.\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MERCIER Sandrine\", \"beneficiary_dob\": null, \"invoice_date\": \"05-10-2021\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OSTEOPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-10-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25918821_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770008977\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CAGNE Lucienne\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHAGNET Carrie\", \"beneficiary_dob\": null, \"invoice_date\": \"07-12-2021\", \"security_number\": \"299038920622513\", \"invoice_issuer\": \"CAGNE Lucienne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-12-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25919222_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"597091032\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORIN S\\u00e9bastien\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Aur\\u00e9lie CHOMBART\", \"beneficiary_dob\": null, \"invoice_date\": \"30-10-2021\", \"security_number\": \"284095959904090\", \"invoice_issuer\": \"FLORIN S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-10-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25919223_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"597091032\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORIN S\\u00e9bastien\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHOMBART St\\u00e9phane\", \"beneficiary_dob\": null, \"invoice_date\": \"30-10-2021\", \"security_number\": \"17605928601949\", \"invoice_issuer\": \"FLORIN S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-10-2021\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25921103_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001625\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. RICHARD FRONTY\", \"total_billed\": 59.0, \"bill_paid\": true, \"amount_paid\": 59.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. BAPTISTE CANARELLI\", \"beneficiary_dob\": \"24-04-2003\", \"invoice_date\": \"11-12-2021\", \"security_number\": null, \"invoice_issuer\": \"M. RICHARD FRONTY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"HN\", \"quantity\": null, \"date_of_service\": \"11-12-2021\", \"mandatory_coverage\": null, \"amount\": 59.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25936311_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770009678\", \"doctor_name\": \"Larret Baptiste\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"insured_name\": null, \"insured_dob\": null, \"invoice_date\": \"15-12-2021\", \"security_number\": \"178080240801549\", \"invoice_issuer\": \"Larret Baptiste\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"amount\": 55}], \"beneficiary_name\": \"SONATORE Fran\\u00e7ois\", \"beneficiary_dob\": \"02-08-1978\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25949353_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300002888\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"REMOUCHE OMAR\", \"total_billed\": 55.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"SALORD REGINE\", \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"21-12-2021\", \"security_number\": \"25307300072853\", \"invoice_issuer\": \"Mr REMOUCHE OMAR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"21-12-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25953786_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"860000124\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LEFEVRE C\\u00e9cile\", \"total_billed\": 52, \"bill_paid\": true, \"amount_paid\": 52, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PERQUIA Michel\", \"beneficiary_dob\": null, \"invoice_date\": \"21-12-2021\", \"security_number\": \"148128619403193\", \"invoice_issuer\": \"LEFEVRE C\\u00e9cile\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-12-2021\", \"mandatory_coverage\": null, \"amount\": 52}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25957679_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350009213\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jenna GUIHEUX\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jean-Yves LEMOINE\", \"beneficiary_dob\": null, \"invoice_date\": \"27-12-2021\", \"security_number\": \"1401261169006\", \"invoice_issuer\": \"Jenna GUIHEUX, ost\\u00e9opathe dipl\\u00f4m\\u00e9e (D.O.)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-12-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25977504_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"860000124\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LEFEVRE C\\u00e9cile\", \"total_billed\": 52, \"bill_paid\": true, \"amount_paid\": 52, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. PERQUIA Michel\", \"beneficiary_dob\": null, \"invoice_date\": \"04-01-2022\", \"security_number\": \"148128619403193\", \"invoice_issuer\": \"LEFEVRE C\\u00e9cile\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-01-2022\", \"mandatory_coverage\": null, \"amount\": 52}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25978266_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910002104\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Val\\u00e9rie KOHL\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PICHON Christelle\", \"beneficiary_dob\": null, \"invoice_date\": \"12-11-2021\", \"security_number\": null, \"invoice_issuer\": \"Val\\u00e9rie KOHL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-11-2021\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25979890_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"800000143\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9dric BABAUT\", \"total_billed\": 67, \"bill_paid\": true, \"amount_paid\": 67, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Miraillesse Baunome\", \"beneficiary_dob\": null, \"invoice_date\": \"07-12-2021\", \"security_number\": null, \"invoice_issuer\": \"C\\u00e9dric BABAUT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 67}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-25979892_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": \"10002239555\", \"doctor_name\": \"Dr Pascal HERLEM\", \"total_billed\": 25.0, \"bill_paid\": true, \"amount_paid\": 25.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"insured_name\": null, \"insured_dob\": null, \"invoice_date\": \"20-12-2021\", \"security_number\": \"277011305507284\", \"invoice_issuer\": \"Dr Pascal HERLEM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"acte d'ost\\u00e9opathie\", \"amount\": 25.0}], \"beneficiary_name\": \"Laurence MIRAILLES\", \"beneficiary_dob\": \"03-01-1977\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26005448_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"930006234\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Coralie TEXIER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MME COTE ANAIS\", \"beneficiary_dob\": null, \"invoice_date\": \"03-01-2022\", \"security_number\": \"2961113004163 73\", \"invoice_issuer\": \"Coralie TEXIER Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-01-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26010134_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130007958\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marine Delmas\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ringuet Yannick\", \"beneficiary_dob\": null, \"invoice_date\": \"14-01-2022\", \"security_number\": \"153048708513447\", \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Marine Delmas\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-01-2022\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26018208_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"930005533\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexandre Rodzinski\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"08-01-2022\", \"security_number\": null, \"invoice_issuer\": \"Alexandre Rodzinski\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins ost\\u00e9opathiques\", \"quantity\": 1, \"date_of_service\": \"08-01-2022\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26018209_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"930005533\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexandre Rodzinski\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"09-12-2021\", \"security_number\": null, \"invoice_issuer\": \"Alexandre Rodzinski\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins ost\\u00e9opathiques\", \"quantity\": 1, \"date_of_service\": \"08-12-2021\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26052732_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330002395\", \"doctor_name\": \"LOTTE Alexandre\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"insured_name\": \"FAURE Nicolas\", \"insured_dob\": \"28-07-1972\", \"invoice_date\": \"12-10-2021\", \"security_number\": \"172071601518922\", \"invoice_issuer\": \"LOTTE Alexandre\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"amount\": 60}], \"beneficiary_name\": \"FAURE Nicolas\", \"beneficiary_dob\": \"28-07-1972\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26063337_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"930006234\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Coralie TEXIER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MME COTE ANAIS\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2022\", \"security_number\": \"296111300416373\", \"invoice_issuer\": \"Coralie TEXIER Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26077623_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26077623_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Naturopathe\", \"adeli_number\": \"1000135548\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Daniel MORIN\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"hues Bellor\", \"beneficiary_dob\": null, \"invoice_date\": \"09-02-2022\", \"security_number\": \"1790683137233\", \"invoice_issuer\": \"D. MORIN SAS\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26077624_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26077624_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Naturopathie\", \"adeli_number\": \"1000135548\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Daniel MORIN\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"thun bellin\", \"beneficiary_dob\": null, \"invoice_date\": \"09-02-2022\", \"security_number\": \"1790683137233\", \"invoice_issuer\": \"D. MORIN SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathie\", \"quantity\": null, \"date_of_service\": \"09-02-2022\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26077829_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26077829_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"297020182\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Bertrand LE LANN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NEVEL Roger\", \"beneficiary_dob\": null, \"invoice_date\": \"14-02-2022\", \"security_number\": \"148062918505140\", \"invoice_issuer\": \"Bertrand LE LANN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-02-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26082051_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26082051_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"640008421\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Camille LABORDE\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CORDIER Sarah\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2022\", \"security_number\": \"294193201304786\", \"invoice_issuer\": \"Camille LABORDE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de chiropraxie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26091260_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"2A0000352\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexandre JOUAN\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MERCIER Gilles\", \"beneficiary_dob\": null, \"invoice_date\": \"13-12-2021\", \"security_number\": null, \"invoice_issuer\": \"Alexandre JOUAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26107527_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26107527_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Naturopathie-Etiopathie\", \"adeli_number\": \"1000135548\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Morin\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bellon Lucas\", \"beneficiary_dob\": null, \"invoice_date\": \"10-11-2021\", \"security_number\": \"179068313723367\", \"invoice_issuer\": \"D. MORIN SAS\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26107528_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26107528_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Naturopathie\", \"adeli_number\": \"1000135548\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Morin\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bellon Emma\", \"beneficiary_dob\": null, \"invoice_date\": \"10-11-2021\", \"security_number\": \"179068313723367\", \"invoice_issuer\": \"D. MORIN SAS\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26112050_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870000148\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Olivier BUISSON\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bophole CANARELLI\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"103046303208531\", \"invoice_issuer\": \"Olivier BUISSON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"21-02-2022\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26112640_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"050000207\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Franck MARCHAND\", \"total_billed\": 66, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROMER Richard\", \"beneficiary_dob\": null, \"invoice_date\": \"23-02-2022\", \"security_number\": \"147109924102108\", \"invoice_issuer\": \"Franck MARCHAND\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION D'OST\\u00c9OPATHIE\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26112692_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"260004395\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARRIER Am\\u00e9lie\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SANTACRU Patrice\", \"beneficiary_dob\": null, \"invoice_date\": \"23-02-2022\", \"security_number\": null, \"invoice_issuer\": \"CHARRIER Am\\u00e9lie - Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"23-02-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26141775_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26141775_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440013605\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tristan de La Belli\\u00e8re\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Cohard Odile\", \"beneficiary_dob\": null, \"invoice_date\": \"05-03-2022\", \"security_number\": null, \"invoice_issuer\": \"Tristan de La Belli\\u00e8re\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"05-03-2022\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26171090_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"650000359\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pascale DULOUT\", \"total_billed\": 50, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEFEUVRE NATHALIE\", \"beneficiary_dob\": null, \"invoice_date\": \"01-03-2022\", \"security_number\": null, \"invoice_issuer\": \"Pascale DULOUT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-03-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26186342_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130010143\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Hiram CASSUTO\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BASTIDA Arnaud\", \"beneficiary_dob\": null, \"invoice_date\": \"10-03-2022\", \"security_number\": \"178069203603002\", \"invoice_issuer\": \"Hiram CASSUTO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-03-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26189648_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"800000143\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9dric BABAUT\", \"total_billed\": 67, \"bill_paid\": true, \"amount_paid\": 67, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mirailles pelorek Laurence\", \"beneficiary_dob\": null, \"invoice_date\": \"30-11-2021\", \"security_number\": \"277011305507284\", \"invoice_issuer\": \"C\\u00e9dric BABAUT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-11-2021\", \"mandatory_coverage\": null, \"amount\": 67}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26189649_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"rpps_number\": \"10002239555\", \"doctor_name\": \"Dr Pascal HERLEM\", \"total_billed\": 25.0, \"bill_paid\": true, \"amount_paid\": 25.0, \"insured_name\": null, \"insured_dob\": null, \"invoice_date\": \"23-03-2022\", \"security_number\": \"277011305507284\", \"invoice_issuer\": \"Dr Pascal HERLEM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"acte d'ost\\u00e9opathie\", \"amount\": 25.0, \"date_of_service\": \"23-03-2022\"}], \"beneficiary_name\": \"Laurence MIRAILLES\", \"beneficiary_dob\": \"03-01-1977\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26219437_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sage-femme\", \"adeli_number\": \"225800200\", \"rpps_number\": \"10100626307\", \"finess_number\": null, \"doctor_name\": \"Mme Lucie LOISEL\", \"total_billed\": 38, \"bill_paid\": true, \"amount_paid\": 38, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Sandrine MERCIER\", \"beneficiary_dob\": \"11-08-1978\", \"invoice_date\": \"08-12-2021\", \"security_number\": \"2780817300077\", \"invoice_issuer\": \"Mme Lucie LOISEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'acupuncture cot\\u00e9e QZRB001 traitement adjuvant et de deuxi\\u00e8me intention\", \"quantity\": 1, \"date_of_service\": \"08-12-2021\", \"mandatory_coverage\": null, \"amount\": 18}, {\"description\": \"prestation de sant\\u00e9 non remboursable (ACTE HORS NOMENCLATURE HN)\", \"quantity\": 1, \"date_of_service\": \"08-12-2021\", \"mandatory_coverage\": null, \"amount\": 20}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26219439_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"220001333\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PILLOT C.\", \"total_billed\": 45.0, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sandrine MERCIER\", \"beneficiary_dob\": null, \"invoice_date\": \"24-02-2022\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OSTEOPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26222521_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"347007718\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VIDAL Nicolas\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HANON Yves-Gabriel\", \"beneficiary_dob\": \"02-02-1939\", \"invoice_date\": \"01-02-2022\", \"security_number\": \"139027511439390\", \"invoice_issuer\": \"Centre M\\u00e9dical l'Occitanie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-02-2022\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26235835_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Orthop\\u00e9die\", \"adeli_number\": \"132662842\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 177.32, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": 17.32, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROBIN ALAIN\", \"beneficiary_dob\": null, \"invoice_date\": \"14-04-2022\", \"security_number\": \"1 54 08 13 055 810 53\", \"invoice_issuer\": \"ORTHOPEDIE - PODOLOGIE MASQUELIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ORTHESE PLANTAIRE >37\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 177.32}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26236562_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26236562_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640003398\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHABBERT Vincent\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cordier Sarah\", \"beneficiary_dob\": \"09-11-1994\", \"invoice_date\": \"07-04-2022\", \"security_number\": null, \"invoice_issuer\": \"CHABBERT Vincent\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Osteopathic consultation\", \"quantity\": null, \"date_of_service\": \"07-04-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26238691_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130010820\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Charlotte MAGNAN\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTINET Jean - Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2022\", \"security_number\": null, \"invoice_issuer\": \"M\\u00e9dical Center Breteuil\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-04-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26252579_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Radiologie\", \"adeli_number\": \"641716469\", \"rpps_number\": \"10100150852\", \"doctor_name\": \"Dr Marion DEWAILLY\", \"total_billed\": 271.8, \"bill_paid\": true, \"amount_paid\": 67.72, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 66.72, \"insured_name\": null, \"insured_dob\": null, \"invoice_date\": \"09-03-2022\", \"security_number\": \"266026437102217\", \"invoice_issuer\": \"S.A.S. CIMPB - Scanner / IRM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Tarif CCAM\", \"amount\": 55.75}, {\"description\": \"Compl\\u00e9ment d'honoraires\", \"amount\": 50.0}, {\"description\": \"Hors nomenclature\", \"amount\": 0.0}, {\"description\": \"Forfait Technique\", \"amount\": 166.05}], \"beneficiary_name\": \"CASABONNE SYLVIE\", \"beneficiary_dob\": \"05-02-1966\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26255763_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26255763_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"759335045\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ga\\u00eblle THOMAS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Viviane GIRODON\", \"beneficiary_dob\": null, \"invoice_date\": \"29-03-2022\", \"security_number\": \"268069203203077\", \"invoice_issuer\": \"Ga\\u00eblle THOMAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"29-03-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"759335045\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ga\\u00eblle THOMAS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Viviane GIRODON\", \"beneficiary_dob\": null, \"invoice_date\": \"12-04-2022\", \"security_number\": \"268069203203077\", \"invoice_issuer\": \"Ga\\u00eblle THOMAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"12-04-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26268049_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"21 8 04268 7\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Guillaume CAUFFRIER\", \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Karine TAVERNIER\", \"beneficiary_dob\": null, \"invoice_date\": \"28-04-2022\", \"security_number\": \"274047115309320\", \"invoice_issuer\": \"Cabinet de Podologie du Sport et de Posturologie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BILAN PODOLOGIQUE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.0}, {\"description\": \"PAIRE DE SEMELLES ORTHOPEDIQUES\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 113.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26272878_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"510000151\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marc-Antoine Kosmala\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Golovkins Francis\", \"beneficiary_dob\": null, \"invoice_date\": \"28-04-2022\", \"security_number\": \"157055110809393\", \"invoice_issuer\": \"Marc-Antoine Kosmala\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"28-04-2022\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26298310_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750001232\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Charlotte DARCET\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRIALOU FREDERIC\", \"beneficiary_dob\": null, \"invoice_date\": \"14-04-2022\", \"security_number\": \"1770312202122\", \"invoice_issuer\": \"Charlotte DARCET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation(s) d'ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": \"14-04-2022\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26298895_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Soins hospitalier\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 90, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 90, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"06-04-2022\", \"security_number\": null, \"invoice_issuer\": \"SCP des Anesth\\u00e9sistes R\\u00e9animateurs d'Essey-L\\u00e8s-Nancy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Compl\\u00e9ment d'honoraires pour anesth\\u00e9sie\", \"quantity\": null, \"date_of_service\": \"13-05-2022\", \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26300440_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"911082295\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dr P. ROUGEE\", \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTIAL JAFFRE\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Dr C. ROUGEE\", \"currency\": null, \"items\": [{\"description\": \"uroscanner\", \"quantity\": null, \"date_of_service\": \"14-05-2022\", \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26300731_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Psychologue\", \"doctor_name\": \"Docteur Arlette RENARD\", \"invoice_issuer\": \"Docteur Arlette RENARD\", \"invoice_date\": \"15-04-2022\", \"beneficiary_name\": \"Madame Marie Claude Beaujard\", \"currency\": \"EUR\", \"items\": [{\"description\": \"IBM poignet droit\", \"date_of_service\": \"15-04-2022\"}, {\"description\": \"Paresth\\u00e9sies et manque force territoire cubital\"}, {\"description\": \"Pas d'argument \\u00e0 l'IRM pour l'atteinte cervicale d\\u00e9tect\\u00e9e sur EMG 2021\"}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26328720_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26328720_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300004223\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mr Patrice SAPHY\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fanny MOULARD\", \"beneficiary_dob\": null, \"invoice_date\": \"14-01-2022\", \"security_number\": null, \"invoice_issuer\": \"Mr Patrice SAPHY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soin d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"14-01-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26328721_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300004223\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mr Patrice SAPHY\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fanny MOULARD\", \"beneficiary_dob\": null, \"invoice_date\": \"31-03-2022\", \"security_number\": null, \"invoice_issuer\": \"Mr Patrice SAPHY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soin d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"31-03-2022\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26341092_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26341092_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"637803982\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHAZOT EMMANUELLE\", \"total_billed\": 109.68, \"bill_paid\": true, \"amount_paid\": 43.86, \"mandatory_coverage\": 65.82, \"complementary_coverage\": null, \"client_part\": 43.86, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FOURMANN ANNA\", \"beneficiary_dob\": \"01-05-1931\", \"invoice_date\": \"01-03-2022\", \"security_number\": \"2310557377001\", \"invoice_issuer\": \"CHAZOT EMMANUELLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"6 AMK8.5\", \"quantity\": 6, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26347233_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440013605\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tristan de La Belli\\u00e8re\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme Caraud Odile\", \"beneficiary_dob\": null, \"invoice_date\": \"19-03-2022\", \"security_number\": null, \"invoice_issuer\": \"Tristan de La Belli\\u00e8re\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"19-03-2022\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26349654_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26349654_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"210000048\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SPINNEWEBER Mathieu\", \"total_billed\": 62.0, \"bill_paid\": true, \"amount_paid\": 62.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAISSUS Sophie\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2022\", \"security_number\": \"178071003301608\", \"invoice_issuer\": \"SPINNEWEBER Mathieu\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 62.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26349759_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690012745\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mme MUGNIER Maro\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRANZOTTO Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"27-04-2022\", \"security_number\": \"176039519903820\", \"invoice_issuer\": \"Ost\\u00e9o Lyon Sud\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-04-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26360535_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"768700023\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mr Eric HELD\", \"total_billed\": 140, \"bill_paid\": true, \"amount_paid\": 140, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIVERA Jade\", \"beneficiary_dob\": null, \"invoice_date\": \"20-04-2022\", \"security_number\": \"267602456000\", \"invoice_issuer\": \"Centre de podologie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 Paire d'Orth\\u00e8ses Plantaires Thermoform\\u00e9es\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 140}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26368529_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26368529_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Pompes fun\\u00e8bres\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 2744.64, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": 2744.64, \"insured_name\": null, \"beneficiary_name\": \"DANIEL HENRI\", \"beneficiary_dob\": \"06-09-1932\", \"invoice_date\": \"25-02-2022\", \"security_number\": null, \"invoice_issuer\": \"POMPES FUNEBRES Serge LE MADEC\", \"currency\": \"EUR\", \"items\": [], \"insured_dob\": null, \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Pompes fun\\u00e8bres\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 670.83, \"bill_paid\": true, \"amount_paid\": 670.83, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"DANIEL HENRI\", \"beneficiary_dob\": null, \"invoice_date\": \"23-02-2022\", \"security_number\": null, \"invoice_issuer\": \"P\\u00f4le fun\\u00e9raire\", \"currency\": \"EUR\", \"items\": [], \"insured_dob\": null, \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26398662_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"400000709\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"St\\u00e9phane KLUG\", \"total_billed\": 57, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CASABONNE Sylvie\", \"beneficiary_dob\": null, \"invoice_date\": \"10-06-2022\", \"security_number\": null, \"invoice_issuer\": \"St\\u00e9phane KLUG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 57}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26399860_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810001123\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9cile Cabanes\", \"total_billed\": 53, \"bill_paid\": true, \"amount_paid\": 53, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Stephane PECOULT\", \"beneficiary_dob\": null, \"invoice_date\": \"16-06-2022\", \"security_number\": \"179077400805212\", \"invoice_issuer\": \"C\\u00e9cile Cabanes\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"16-06-2022\", \"mandatory_coverage\": null, \"amount\": 53}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26401996_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"817008865\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TREZEGUET DAMIEN\", \"total_billed\": 205.56, \"bill_paid\": true, \"amount_paid\": 82.2, \"mandatory_coverage\": 123.36, \"complementary_coverage\": 0.0, \"client_part\": 82.2, \"remaining_payment\": 0, \"insured_name\": \"MARTINEZ FABIENNE\", \"insured_dob\": \"20-12-1971\", \"beneficiary_name\": \"MARTINEZ FABIENNE\", \"beneficiary_dob\": \"20-12-1971\", \"invoice_date\": \"25-05-2022\", \"security_number\": \"2711212202090\", \"invoice_issuer\": \"TREZEGUET DAMIEN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"12 AMS7.5\", \"quantity\": 12, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"3 IFS\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26426650_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26426650_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"290002435\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CORRE Elodie\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RUNAVOT Jacques\", \"beneficiary_dob\": \"27-05-1947\", \"invoice_date\": \"30-06-2022\", \"security_number\": \"147052920901568\", \"invoice_issuer\": \"El CORRE Elodie ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-06-2022\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26441046_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"rpps_number\": \"10101564341\", \"doctor_name\": \"Magali BERTEAUD\", \"total_billed\": 36, \"bill_paid\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Matina sabine\", \"beneficiary_dob\": null, \"invoice_date\": \"24-05-2022\", \"invoice_issuer\": \"Magali BERTEAUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation de p\\u00e9dicurie podologie\", \"quantity\": 1, \"amount\": 36}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26447822_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. Philippe QUENOT\", \"total_billed\": null, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRANDHAY Jean-Claude\", \"beneficiary_dob\": null, \"invoice_date\": \"10-07-2022\", \"security_number\": \"1470869268005\", \"invoice_issuer\": \"M. Philippe QUENOT\", \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26447823_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"70709401C\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe QUENOT\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRANDHAYE Jean-Claude\", \"beneficiary_dob\": null, \"invoice_date\": \"21-06-2022\", \"security_number\": null, \"invoice_issuer\": \"Philippe QUENOT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance D'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-06-2022\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26447833_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"708000153\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mme LANDIOT-COLLARDEY Emilie\", \"total_billed\": 38.0, \"bill_paid\": true, \"amount_paid\": 38.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Grandhay Marie-Pierre\", \"insured_dob\": null, \"beneficiary_name\": \"Mme Grandhay Marie-Pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"24-06-2022\", \"security_number\": null, \"invoice_issuer\": \"Mme LANDIOT-COLLARDEY Emilie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soin de p\\u00e9dicurie\", \"quantity\": 1, \"date_of_service\": \"24-06-2022\", \"mandatory_coverage\": null, \"amount\": 38.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26455411_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Richard ALBERTI\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANTEUFEL Ludovic\", \"beneficiary_dob\": null, \"invoice_date\": \"09-06-2022\", \"security_number\": null, \"invoice_issuer\": \"Richard ALBERTI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"09-06-2022\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26455412_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060003084\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DELHOTELLERIE louis\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MANTEUFEL Ludovic\", \"beneficiary_dob\": null, \"invoice_date\": \"07-06-2022\", \"security_number\": null, \"invoice_issuer\": \"DELHOTELLERIE louis\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"07-06-2022\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26469442_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"139318208\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laetitia BUIS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie LURAC\", \"beneficiary_dob\": null, \"invoice_date\": \"19-07-2022\", \"security_number\": \"1750793081080\", \"invoice_issuer\": \"Laetitia BUIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances psychologiques\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26469508_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26469508_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"570000976\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexis CORTINA\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"15-07-2022\", \"security_number\": null, \"invoice_issuer\": \"Cabinet chiropratique Pil\\u00e2tre de Rozier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi Chiropratique\", \"quantity\": 1, \"date_of_service\": \"15-07-2022\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"570000976\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexis CORTINA\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M GIUDICI Eric\", \"beneficiary_dob\": \"18-03-1965\", \"invoice_date\": \"28-06-2022\", \"security_number\": null, \"invoice_issuer\": \"Cabinet chiropratique Pil\\u00e2tre de Rozier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi Chiropratique\", \"quantity\": 1, \"date_of_service\": \"28-06-2022\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26476811_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"727004301\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thierry Pouget\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LOUTREUX Jessica\", \"beneficiary_dob\": \"12-05-1981\", \"invoice_date\": \"20-07-2022\", \"security_number\": \"281055701202306\", \"invoice_issuer\": \"Thierry Pouget\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"20-07-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26479073_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26479073_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Biologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"953702016\", \"doctor_name\": \"DR CARMI ANTOINE\", \"total_billed\": 20.0, \"bill_paid\": true, \"amount_paid\": 20.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 20.0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAGELLAN RENE\", \"beneficiary_dob\": \"12-11-1953\", \"invoice_date\": \"17-03-2022\", \"security_number\": \"1531197214037 93\", \"invoice_issuer\": \"Laboratoire de Biologie M\\u00e9dicale Gaetan ESPOSITO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Pr\\u00e9l + D\\u00e9placements\", \"quantity\": null, \"date_of_service\": \"17-03-2022\", \"mandatory_coverage\": null, \"amount\": 20.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26483179_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26483179_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690012588\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COULOT Sacha\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GRANDHAY Marie-Pierre\", \"beneficiary_dob\": \"02-09-1952\", \"invoice_date\": \"22-07-2022\", \"security_number\": \"2520925388423\", \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie COULOT Sacha\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation adulte\", \"quantity\": 1, \"date_of_service\": \"22-07-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26500702_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750012916\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Romain MARGUERIE\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Trihan Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2022\", \"security_number\": \"179095314704438\", \"invoice_issuer\": \"Romain MARGUERIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-04-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26505719_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810001123\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9cile Cabanes\", \"total_billed\": 53, \"bill_paid\": true, \"amount_paid\": 53, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"St\\u00e9phane PECOULT\", \"beneficiary_dob\": null, \"invoice_date\": \"29-07-2022\", \"security_number\": null, \"invoice_issuer\": \"C\\u00e9cile Cabanes\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"29-07-2022\", \"mandatory_coverage\": null, \"amount\": 53}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26538986_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330002296\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BENOIT LAVILLE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEGROGNEC Anne Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"08-08-2022\", \"security_number\": null, \"invoice_issuer\": \"BENOIT LAVILLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"NOTE D'HONORAIRES ACQUITEE\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26539711_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"310003173\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Elodie ELCRIN\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAVES Patrick\", \"beneficiary_dob\": null, \"invoice_date\": \"11-08-2022\", \"security_number\": \"162123155547891\", \"invoice_issuer\": \"Elodie ELCRIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26539712_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26539712_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"310003173\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Elodie ELCRIN\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAVES Patrick\", \"beneficiary_dob\": null, \"invoice_date\": \"17-02-2022\", \"security_number\": \"162123155547891\", \"invoice_issuer\": \"Elodie ELCRIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26548441_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770009470\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MARINO Julien\", \"total_billed\": 52, \"bill_paid\": true, \"amount_paid\": 52, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUYON Sylvain\", \"beneficiary_dob\": \"25-04-1979\", \"invoice_date\": \"27-08-2022\", \"security_number\": null, \"invoice_issuer\": \"EI CABINET D'OSTEOPATHIE HELENE CHESNEAU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 52}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26555547_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26555547_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"321006694\", \"rpps_number\": \"10100968964\", \"finess_number\": null, \"doctor_name\": \"Yann LE DEAN\", \"total_billed\": 220.65, \"bill_paid\": true, \"amount_paid\": 118.7, \"mandatory_coverage\": 101.95, \"complementary_coverage\": null, \"client_part\": 118.7, \"remaining_payment\": null, \"insured_name\": \"Pierre ZANCANARO\", \"insured_dob\": \"14-03-1934\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"06-09-2022\", \"security_number\": \"134033214700141\", \"invoice_issuer\": \"Docteur Yann LE DEAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ARTHRUM 75 CHEVILLE DROITE 1 INJECTION\", \"quantity\": 1, \"date_of_service\": \"06-09-2022\", \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26555594_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"340001056\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Olivier PONSY\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"THEVENOT Claudine\", \"beneficiary_dob\": null, \"invoice_date\": \"16-08-2022\", \"security_number\": \"2520418279005\", \"invoice_issuer\": \"Olivier PONSY POL-OSTEO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-08-2022\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26556186_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130019516\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ABONNEL Nils\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LABORDE Daniele\", \"beneficiary_dob\": null, \"invoice_date\": \"30-08-2022\", \"security_number\": \"2541111069031\", \"invoice_issuer\": \"ABONNEL Nils\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-08-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26556388_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26556388_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"977408202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean Philippe LANLY\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": \"Limbada Mohammed Taahir\", \"insured_dob\": null, \"beneficiary_name\": \"Limbada Mohammed Taahir\", \"beneficiary_dob\": null, \"invoice_date\": \"05-08-2022\", \"security_number\": \"105127521494711\", \"invoice_issuer\": \"Monsieur Jean Philippe LANLY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-08-2022\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"977408202\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean Philippe LANLY\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": \"Limbada Mohammed Taahir\", \"insured_dob\": null, \"beneficiary_name\": \"Limbada Mohammed Taahir\", \"beneficiary_dob\": null, \"invoice_date\": \"19-07-2022\", \"security_number\": \"105127521494711\", \"invoice_issuer\": \"Monsieur Jean Philippe LANLY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"19-07-2022\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26559059_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26559059_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870001369\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHARPENTIER Thierry\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GOUAULT. STEPHAN\", \"beneficiary_dob\": null, \"invoice_date\": \"22-08-2022\", \"security_number\": null, \"invoice_issuer\": \"CHARPENTIER Thierry\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26574993_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950003863\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ROBINEAU Jessica\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lucie Moreira\", \"beneficiary_dob\": \"01-06-1979\", \"invoice_date\": \"23-08-2022\", \"security_number\": null, \"invoice_issuer\": \"Cabinet param\\u00e9dical\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"23-08-2022\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26585673_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770006740\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Charl\\u00e8ne GALBRUN D.C\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dupuis Edmonde\", \"beneficiary_dob\": null, \"invoice_date\": \"14-09-2022\", \"security_number\": \"1320789350001\", \"invoice_issuer\": \"Charl\\u00e8ne GALBRUN D.C\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"14-09-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26585743_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26585743_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"541026894\", \"rpps_number\": \"10002340783\", \"finess_number\": null, \"doctor_name\": \"Docteur Denis EVRARD\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 36.5, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Martine CATTANI\", \"beneficiary_dob\": null, \"invoice_date\": \"14-09-2022\", \"security_number\": \"2 54 09 54 329 476 - 47\", \"invoice_issuer\": \"Docteur Denis EVRARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"acte de m\\u00e9decine manuelle de la colonne vert\\u00e9brale (LHRP001+X)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 33.5, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26592477_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26592477_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"047700208\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. Patrice MICHEL\", \"total_billed\": 84, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 84, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAXIMIN Jean pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"08-09-2022\", \"security_number\": null, \"invoice_issuer\": \"M. Patrice MICHEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance(s) d'onde de choc(s) dont 14\\u20ac de d\\u00e9passement\", \"quantity\": 6, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 84}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26592510_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"090000142\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nicolas Commecy\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUCONNIER Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"25-07-2022\", \"security_number\": \"158085903619888\", \"invoice_issuer\": \"Nicolas Commecy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"25-07-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26592511_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": \"10000410984\", \"finess_number\": null, \"doctor_name\": \"Besnard Jean-Michel\", \"total_billed\": 95, \"bill_paid\": true, \"amount_paid\": 95, \"mandatory_coverage\": 25, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Greco Christine\", \"beneficiary_dob\": \"16-04-1960\", \"invoice_date\": \"01-08-2022\", \"security_number\": \"2600475051209\", \"invoice_issuer\": \"Docteur Besnard Jean-Michel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation m\\u00e9dicale\", \"quantity\": 1, \"date_of_service\": \"01-08-2022\", \"mandatory_coverage\": 25, \"amount\": 25}, {\"description\": \"soins d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-08-2022\", \"mandatory_coverage\": 0, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26595219_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26595219_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330001785\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DOAT MATHIEU\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIRAUD JOCELYNE\", \"beneficiary_dob\": \"08-02-1946\", \"invoice_date\": \"20-09-2022\", \"security_number\": \"246020240800823\", \"invoice_issuer\": \"DOAT MATHIEU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Note d'honoraires ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": \"20-09-2022\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26605349_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130019516\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ABONNEL Nils\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laborde Daniele\", \"beneficiary_dob\": null, \"invoice_date\": \"27-09-2022\", \"security_number\": \"2541111069031\", \"invoice_issuer\": \"ABONNEL Nils\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": null, \"date_of_service\": \"27-09-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26611054_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"250000338\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexandre PATIRAS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GANNARD Marie-Th\\u00e9r\\u00e8se\", \"beneficiary_dob\": \"01-02-1946\", \"invoice_date\": \"03-09-2022\", \"security_number\": \"246022521100280\", \"invoice_issuer\": \"Alexandre PATIRAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-09-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26613355_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26613355_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"797003266\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fran\\u00e7ois DANDO\", \"total_billed\": 140, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": \"M. DEMAY Jacques\", \"insured_dob\": null, \"beneficiary_name\": \"M. DEMAY Jacques\", \"beneficiary_dob\": null, \"invoice_date\": \"25-02-2022\", \"security_number\": \"1471279191067\", \"invoice_issuer\": \"Fran\\u00e7ois DANDO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) d'ost\\u00e9opathie\", \"quantity\": 2, \"date_of_service\": \"25-02-2022\", \"mandatory_coverage\": null, \"amount\": 140}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"790001192\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Photine Carlevan\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"out_of_pocket\": null, \"remaining_payment\": null, \"insured_name\": \"M. DEMAY Jacques\", \"insured_dob\": null, \"beneficiary_name\": \"M. DEMAY Jacques\", \"beneficiary_dob\": null, \"invoice_date\": \"13-09-2021\", \"security_number\": \"1471279191067\", \"invoice_issuer\": \"Photine Carlevan\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-09-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26626549_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26626549_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"M\\u00e9decine G\\u00e9n\\u00e9rale\", \"adeli_number\": \"171004146\", \"rpps_number\": \"10100790970\", \"doctor_name\": \"Dr Julien DELAIRE\", \"total_billed\": 25, \"bill_paid\": true, \"amount_paid\": 25, \"insured_name\": null, \"insured_dob\": null, \"security_number\": \"275121730013911\", \"beneficiary_name\": \"GUERIT Maryline\", \"beneficiary_dob\": \"23-12-1978\", \"invoice_date\": \"22-09-2022\", \"invoice_issuer\": \"Dr Julien DELAIRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acte Hors Nomenclature de M\\u00e9soth\\u00e9rapie \\u00e0 vis\\u00e9e antalgique Code ANLB003 (soin de m\\u00e9soth\\u00e9rapie non remboursable par la CPAM mais pouvant l'\\u00eatre par une assurance compl\\u00e9mentaire selon le contrat personnel d'assurance du patient)\", \"quantity\": 1, \"date_of_service\": \"22-09-2022\", \"amount\": 25}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26626551_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"210000048\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SPINNEWEBER Mathieu\", \"total_billed\": 62.0, \"bill_paid\": true, \"amount_paid\": 62.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"Monsieur LASSUS NICOLAS\", \"beneficiary_dob\": null, \"invoice_date\": \"06-09-2022\", \"security_number\": \"178071003301608\", \"invoice_issuer\": \"SPINNEWEBER Mathieu\", \"currency\": \"EUR\", \"items\": [{\"description\": \"et re\\u00e7ue ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": \"06-07-2021\", \"mandatory_coverage\": null, \"amount\": 62.0}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26626836_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26626836_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"927019521\", \"rpps_number\": \"10005695381\", \"doctor_name\": \"Donn\\u00e9 Cyril\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"beneficiary_name\": \"MME Elgherbi Blandine\", \"invoice_date\": \"02-08-2022\", \"invoice_issuer\": \"Donn\\u00e9 Cyril\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"amount\": 50, \"date_of_service\": \"02-08-2022\"}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"927019521\", \"rpps_number\": \"10005695381\", \"doctor_name\": \"Donn\\u00e9 Cyril\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"beneficiary_name\": \"MME Elgherbi Blandine\", \"invoice_date\": \"01-09-2022\", \"invoice_issuer\": \"Donn\\u00e9 Cyril\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"amount\": 50, \"date_of_service\": \"01-09-2022\"}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26627281_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26627281_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Radiologie\", \"adeli_number\": null, \"rpps_number\": \"10100521441\", \"finess_number\": null, \"doctor_name\": \"Docteur HAMMOUD SONIA\", \"total_billed\": 77.8, \"bill_paid\": true, \"amount_paid\": 51.34, \"mandatory_coverage\": 37.8, \"complementary_coverage\": null, \"client_part\": 51.34, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MME DUPUIS EDMONDE\", \"beneficiary_dob\": \"08-08-1943\", \"invoice_date\": \"06-10-2022\", \"security_number\": \"132078935000143\", \"invoice_issuer\": \"POLE DE SANTE LA FRANCILIENNE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PCQM001\", \"quantity\": null, \"date_of_service\": \"06-10-2022\", \"mandatory_coverage\": 37.8, \"amount\": 77.8}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Radiologie\", \"adeli_number\": null, \"rpps_number\": \"1003748661\", \"finess_number\": null, \"doctor_name\": \"Docteur YEH Jian Fang\", \"total_billed\": 59.52, \"bill_paid\": true, \"amount_paid\": 34.66, \"mandatory_coverage\": 24, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MME DUPUIS EDMONDE\", \"beneficiary_dob\": \"08-08-1943\", \"invoice_date\": \"06-10-2022\", \"security_number\": \"132078935000143\", \"invoice_issuer\": \"POLE DE SANTE LA FRANCILIENNE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"MAQK001\", \"quantity\": null, \"date_of_service\": \"06-10-2022\", \"mandatory_coverage\": 35.52, \"amount\": 59.52}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26660350_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"541026894\", \"rpps_number\": \"10002340783\", \"finess_number\": null, \"doctor_name\": \"Docteur Denis EVRARD\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 36.5, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Martine CATTANI\", \"beneficiary_dob\": null, \"invoice_date\": \"19-10-2022\", \"security_number\": \"254095432947647\", \"invoice_issuer\": \"Docteur Denis EVRARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"compl\\u00e9ment ost\\u00e9opathie m\\u00e9dicale d'un acte de m\\u00e9decine manuelle de la colonne vert\\u00e9brale (LHRP001+X)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26660518_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26660518_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Radiologie\", \"adeli_number\": \"941156929\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"NGO ANTOINE\", \"total_billed\": 101.75, \"bill_paid\": true, \"amount_paid\": 48.03, \"mandatory_coverage\": 76.75, \"complementary_coverage\": null, \"client_part\": 48.03, \"remaining_payment\": null, \"insured_name\": \"Mme Jeanne TAVERNY\", \"insured_dob\": \"09-08-1936\", \"beneficiary_name\": \"Mme Jeanne TAVERNY\", \"beneficiary_dob\": \"09-08-1936\", \"invoice_date\": \"11-10-2022\", \"security_number\": \"236089721580035\", \"invoice_issuer\": \"CENTRE D'IMAGERIE MEDICALE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": null, \"date_of_service\": \"11-10-2022\", \"mandatory_coverage\": null, \"amount\": 101.75}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Radiologie\", \"adeli_number\": \"940020357\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Henri Dret\", \"total_billed\": 25, \"bill_paid\": true, \"amount_paid\": 25, \"mandatory_coverage\": 17.5, \"complementary_coverage\": null, \"client_part\": 7.5, \"remaining_payment\": null, \"insured_name\": \"Mme Jeanne TAVERNY\", \"insured_dob\": \"09-08-1936\", \"beneficiary_name\": \"Mme Jeanne TAVERNY\", \"beneficiary_dob\": \"09-08-1936\", \"invoice_date\": \"07-10-2022\", \"security_number\": \"236089721580035\", \"invoice_issuer\": \"CMS Henri Dret Cabinet\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": null, \"date_of_service\": \"07-10-2022\", \"mandatory_coverage\": null, \"amount\": 25}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26675471_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"388002073\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RAMEL Fabienne\", \"total_billed\": 37.0, \"bill_paid\": true, \"amount_paid\": 37.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Philippe BOUTEILLE\", \"beneficiary_dob\": \"06-03-1957\", \"invoice_date\": \"10-10-2022\", \"security_number\": null, \"invoice_issuer\": \"RAMEL Fabienne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SOINS DE PEDICURIE\", \"quantity\": null, \"date_of_service\": \"10-10-2022\", \"mandatory_coverage\": null, \"amount\": 37.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26681219_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920008075\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien DESLOOVERE\", \"total_billed\": 60.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dehoche Antial\", \"beneficiary_dob\": null, \"invoice_date\": \"28-10-2022\", \"security_number\": null, \"invoice_issuer\": \"Julien DESLOOVERE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins ost\\u00e9opathiques\", \"quantity\": null, \"date_of_service\": \"28-10-2022\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26684295_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"350007498\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Audrey ALIX - LE GUILLOU\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DALMAS Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"27-10-2022\", \"security_number\": null, \"invoice_issuer\": \"Audrey ALIX - LE GUILLOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-10-2022\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26685696_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"297022386\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BELLEC Christophe\", \"total_billed\": 52, \"bill_paid\": true, \"amount_paid\": 52, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MEZIN St\\u00e9phane\", \"beneficiary_dob\": null, \"invoice_date\": \"24-10-2022\", \"security_number\": null, \"invoice_issuer\": \"BELLEC Christophe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"24-10-2022\", \"mandatory_coverage\": null, \"amount\": 52}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26692990_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590000675\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"OBLIN Aur\\u00e9lie\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Florent Christian\", \"beneficiary_dob\": null, \"invoice_date\": \"02-11-2022\", \"security_number\": null, \"invoice_issuer\": \"OBLIN Aur\\u00e9lie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"02-11-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26694994_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060011319\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Garrone St\\u00e9phane\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DREVONT Patrick\", \"beneficiary_dob\": \"25-04-1953\", \"invoice_date\": \"03-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie Garrone St\\u00e9phane\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26699212_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920008075\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien DESLOOVERE\", \"total_billed\": 60.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dehacty Anting\", \"beneficiary_dob\": null, \"invoice_date\": \"28-10-2022\", \"security_number\": null, \"invoice_issuer\": \"Julien DESLOOVERE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins ost\\u00e9opathiques\", \"quantity\": null, \"date_of_service\": \"28-10-2022\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26699279_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920008075\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julien DESLOOVERE\", \"total_billed\": 60.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Dehoche Antial\", \"beneficiary_dob\": null, \"invoice_date\": \"28-10-2022\", \"security_number\": null, \"invoice_issuer\": \"Julien DESLOOVERE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins ost\\u00e9opathiques\", \"quantity\": null, \"date_of_service\": \"28-10-2022\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26702207_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440001519\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lo\\u00efc Gatinel\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jamet LUDOVIC\", \"beneficiary_dob\": null, \"invoice_date\": \"03-11-2022\", \"security_number\": \"1730422050015 51\", \"invoice_issuer\": \"Gatinel Lo\\u00efc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"03-11-2022\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26711446_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"760001750\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RENAULT Mathilde\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PETIT JULIE\", \"beneficiary_dob\": \"27-01-1984\", \"invoice_date\": \"27-10-2022\", \"security_number\": null, \"invoice_issuer\": \"RENAULT Mathilde\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26728037_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26728037_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"810000315\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ALIAGA REMY\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LORTAL Alexandra\", \"beneficiary_dob\": null, \"invoice_date\": \"06-10-2022\", \"security_number\": null, \"invoice_issuer\": \"ALIAGA REMY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de LORTAL Alexandra le 06/10/2022\", \"quantity\": 1, \"date_of_service\": \"06-10-2022\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26731488_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laure RENAULT\", \"total_billed\": 60.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lola MAUFROIS\", \"beneficiary_dob\": null, \"invoice_date\": \"15-09-2022\", \"security_number\": null, \"invoice_issuer\": \"Laure RENAULT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1\\u00e8re S\\u00e9ance individuelle de sophrologie en cabinet\", \"quantity\": 1, \"date_of_service\": \"15-09-2022\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26738638_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330002296\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BENOIT LAVILLE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. LE GROGNEC Guy\", \"beneficiary_dob\": null, \"invoice_date\": \"14-10-2022\", \"security_number\": null, \"invoice_issuer\": \"BENOIT LAVILLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"NOTE D'HONORAIRES\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26739265_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"178000857\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ma\\u00e9va Poulain\", \"total_billed\": 30, \"bill_paid\": true, \"amount_paid\": 30, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PETIT Julie\", \"beneficiary_dob\": null, \"invoice_date\": \"01-08-2022\", \"security_number\": \"284017634111884\", \"invoice_issuer\": \"Ma\\u00e9va Poulain\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de podologie p\\u00e9dicurie\", \"quantity\": 1, \"date_of_service\": \"01-08-2022\", \"mandatory_coverage\": null, \"amount\": 30}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26741247_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9lo\\u00efse ALLAIN\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PALPACUER Geraldine\", \"beneficiary_dob\": null, \"invoice_date\": \"17-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Centre de Micropraxie et Microkin\\u00e9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"acte de Micropraxie et Microkin\\u00e9sith\\u00e9rapie, acte hors nomenclature\", \"quantity\": 1, \"date_of_service\": \"17-11-2022\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26744045_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"550000224\", \"doctor_name\": \"SICOT Micka\\u00ebl\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"insured_name\": null, \"beneficiary_name\": \"VAILLANT Am\\u00e9lie\", \"invoice_date\": \"18-11-2022\", \"security_number\": \"1841155545089\", \"invoice_issuer\": \"SICOT Micka\\u00ebl\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"date_of_service\": \"18-11-2022\", \"amount\": 55}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26750142_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750003998\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lie SELLAM AVITAN\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHENARD Yvette\", \"beneficiary_dob\": null, \"invoice_date\": \"26-10-2022\", \"security_number\": \"251049935364646\", \"invoice_issuer\": \"E.I. Aur\\u00e9lie SELLAM AVITAN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations d'Ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": \"26-10-2022\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26754469_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26754469_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"2B8000550\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PERY Damien\", \"total_billed\": 20, \"bill_paid\": true, \"amount_paid\": 20, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SIMONIN\", \"beneficiary_dob\": null, \"invoice_date\": \"16-11-2022\", \"security_number\": \"274069741634337\", \"invoice_issuer\": \"PERY Damien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"16-11-2022\", \"mandatory_coverage\": null, \"amount\": 20}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26760649_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"760001750\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RENAULT Mathilde\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"RIVERA FRANCOIS\", \"beneficiary_dob\": \"28-09-1979\", \"invoice_date\": \"03-11-2022\", \"security_number\": \"267602456000\", \"invoice_issuer\": \"RENAULT Mathilde\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26761246_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940001209\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Axel Renard\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHETARD Denis\", \"beneficiary_dob\": null, \"invoice_date\": \"16-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Axel Renard\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins ost\\u00e9opathiques\", \"quantity\": null, \"date_of_service\": \"16-11-2022\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26761886_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780012944\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laura SAVOUR\\u00c9\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HAUTEFEUILLE Elodie\", \"beneficiary_dob\": null, \"invoice_date\": \"24-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Laura SAVOUR\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"24-11-2022\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26764181_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"336172895\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Romain Provost\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Founeau Audrey\", \"beneficiary_dob\": null, \"invoice_date\": \"22-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Romain Provost\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"22-11-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26766223_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"760001750\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RENAULT Mathilde\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"RIVERA FRANCOIS\", \"beneficiary_dob\": \"28-09-1979\", \"invoice_date\": \"24-11-2022\", \"security_number\": null, \"invoice_issuer\": \"RENAULT Mathilde\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26766261_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 30.0, \"bill_paid\": true, \"amount_paid\": 30.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2024\", \"security_number\": null, \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": null, \"quantity\": 1, \"date_of_service\": \"27-01-2024\", \"mandatory_coverage\": null, \"amount\": 30.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26766352_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"502647\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vaul\\u00e9on Christophe\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"HELLIO Guy\", \"beneficiary_dob\": \"07-05-1948\", \"invoice_date\": \"17-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Vaul\\u00e9on Christophe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26771426_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"338032345\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Poulain Bernard\", \"total_billed\": 130, \"bill_paid\": true, \"amount_paid\": 130, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROBIN Jo\\u00eblle\", \"beneficiary_dob\": null, \"invoice_date\": \"18-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Justine MARQUET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 PAIRE D'ORTHESES PLANTAIRES CONFORME AU CAHIER DES CHARGES REFERENCES LPP : 2,14 04 55 x2\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 130}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26772586_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060012002\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GERMIER Lo\\u00efc\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Balthazar Vincent\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"184129542803583\", \"invoice_issuer\": \"GERMIER Lo\\u00efc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"19-07-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26774954_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770000511\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Philippe EYMARD\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Monsieur FOURMAULT RENE\", \"beneficiary_dob\": null, \"invoice_date\": \"23-12-2021\", \"security_number\": \"1521162284027\", \"invoice_issuer\": \"Jean-Philippe EYMARD OSTEOPATHE D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"23-12-2021\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26775264_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"760001750\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RENAULT Mathilde\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PETIT JULIE\", \"beneficiary_dob\": \"27-01-1984\", \"invoice_date\": \"18-11-2022\", \"security_number\": null, \"invoice_issuer\": \"RENAULT Mathilde\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26775374_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"760001750\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RENAULT Mathilde\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIVERA FRANCOIS\", \"beneficiary_dob\": \"28-09-1979\", \"invoice_date\": \"24-11-2022\", \"security_number\": null, \"invoice_issuer\": \"RENAULT Mathilde\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9ophathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26908261_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750019762\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Simon Joly\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PERRIN Kamila\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"2 21 04 77 237 187 35\", \"invoice_issuer\": \"Simon Joly\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"30-11-2022\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26921045_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": null, \"rpps_number\": \"810102117917\", \"finess_number\": null, \"doctor_name\": \"Hippolyte GROULEZ\", \"total_billed\": 170.0, \"bill_paid\": true, \"amount_paid\": 170.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"Komolka Isabelle\", \"insured_dob\": null, \"beneficiary_name\": \"Komolka Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"30-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Hippolyte GROULEZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Orth\\u00e8se Plantaire\", \"quantity\": 1, \"date_of_service\": \"30-11-2022\", \"mandatory_coverage\": null, \"amount\": 170.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26934135_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"170002281\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Yoatis MOLLIERE\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. SEUNAT Denis\", \"beneficiary_dob\": null, \"invoice_date\": \"29-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Yoatis MOLLIERE\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26934661_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060013463\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tristan MOUSSY\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BENHAMED Nicolas\", \"beneficiary_dob\": \"06-11-1980\", \"invoice_date\": \"05-11-2022\", \"security_number\": null, \"invoice_issuer\": \"Tristan MOUSSY Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-11-2022\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26935993_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"358000735\", \"rpps_number\": \"10006091754\", \"finess_number\": null, \"doctor_name\": \"DOUCET ANNIE\", \"total_billed\": 110, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEBOURHIS ANTOINETTE\", \"beneficiary_dob\": null, \"invoice_date\": \"08-12-2021\", \"security_number\": \"259083536043094\", \"invoice_issuer\": \"DOUCET ANNIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LPP 2122121 X2\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 110}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26941951_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"770005346\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mr GHERRAM Omar\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"OUASSILA KERKAERT\", \"beneficiary_dob\": \"30-08-1975\", \"invoice_date\": \"18-06-2022\", \"security_number\": \"275089935205144\", \"invoice_issuer\": \"Mr GHERRAM Omar\", \"currency\": \"EUR\", \"items\": [{\"description\": null, \"quantity\": null, \"date_of_service\": \"18-06-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26946589_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"458000775\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Claire Labb\\u00e9-El\", \"total_billed\": 150, \"bill_paid\": true, \"amount_paid\": 150, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FLEURY Matthieu\", \"beneficiary_dob\": null, \"invoice_date\": \"26-10-2022\", \"security_number\": \"186087645113433\", \"invoice_issuer\": \"Mme Claire Labb\\u00e9-El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"paire d'orth\\u00e8ses plantaires\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 150}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26951476_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780001790\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RENAULT Mathilde\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIVERA JADE\", \"beneficiary_dob\": \"28-09-2006\", \"invoice_date\": \"08-12-2022\", \"security_number\": null, \"invoice_issuer\": \"RENAULT Mathilde\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26954651_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"2A0000097\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Xavier LUCCIARDI\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DI MUCCIO St\\u00e9phane\", \"beneficiary_dob\": null, \"invoice_date\": \"08-12-2022\", \"security_number\": \"176102A004021\", \"invoice_issuer\": \"Xavier LUCCIARDI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 SEANCE D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"08-12-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26957916_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"358001162\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mme Pr\\u00e9vost Salom\\u00e9\", \"total_billed\": 125.0, \"bill_paid\": true, \"amount_paid\": 125.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Kleinbauer Luc\", \"insured_dob\": null, \"beneficiary_name\": \"Kleinbauer Luc\", \"beneficiary_dob\": null, \"invoice_date\": \"12-10-2022\", \"security_number\": null, \"invoice_issuer\": \"Mme Pr\\u00e9vost Salom\\u00e9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Podologie au cabinet\", \"quantity\": 1, \"date_of_service\": \"12-10-2022\", \"mandatory_coverage\": null, \"amount\": 125.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26964511_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"290000520\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Philippe DEFEBVRE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"L'HELGUEN St\\u00e9phanie\", \"beneficiary_dob\": null, \"invoice_date\": \"30-09-2022\", \"security_number\": \"285112901902265\", \"invoice_issuer\": \"Jean-Philippe DEFEBVRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires de la consultation\", \"quantity\": 1, \"date_of_service\": \"30-09-2022\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26964863_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300002565\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VALLAT S\\u00e9bastien\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rogies Aur\\u00e9lien\", \"beneficiary_dob\": null, \"invoice_date\": \"13-12-2022\", \"security_number\": \"179043155525730\", \"invoice_issuer\": \"VALLAT S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-02-2022\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26972680_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"030000780\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"JEREMY DESCHET\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUINET ROBERT\", \"beneficiary_dob\": null, \"invoice_date\": \"13-12-2022\", \"security_number\": \"146027158800291\", \"invoice_issuer\": \"JEREMY DESCHET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraires\", \"quantity\": null, \"date_of_service\": \"13-12-2022\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26977630_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780002275\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cl\\u00e9lia FELIX\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Antoine Garcia\", \"beneficiary_dob\": null, \"invoice_date\": \"17-12-2022\", \"security_number\": null, \"invoice_issuer\": \"Cl\\u00e9lia FELIX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-12-2022\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26991829_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Radiologie\", \"rpps_number\": \"10100083970\", \"adeli_number\": \"441098969\", \"doctor_name\": \"JENNIN FELICIE\", \"total_billed\": 99.93, \"bill_paid\": true, \"amount_paid\": 99.93, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"invoice_date\": \"16-12-2022\", \"security_number\": \"261024410917768\", \"invoice_issuer\": \"GROUPE IRIS GRIM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Montant de l'examan\", \"date_of_service\": \"16-12-2022\", \"amount\": 69.93}, {\"description\": \"OPTAM (Option de Pratique Tarifaire Ma\\u00eetris\\u00e9e)\", \"amount\": 30}], \"beneficiary_name\": \"COUEDRO MARTINE\", \"beneficiary_dob\": \"08-02-1961\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-26999810_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"2B0001259\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Benjamin AGOSTINI\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AGOSTINI Jean-marie\", \"beneficiary_dob\": \"15-01-1953\", \"invoice_date\": \"15-12-2022\", \"security_number\": \"153019935145408\", \"invoice_issuer\": \"Benjamin AGOSTINI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-12-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27008469_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"848000485\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Manon STEPHAN E.I\", \"total_billed\": 36.0, \"bill_paid\": true, \"amount_paid\": 36.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"DE HARO Louis\", \"insured_dob\": null, \"beneficiary_name\": \"DE HARO Louis\", \"beneficiary_dob\": null, \"invoice_date\": \"12-12-2022\", \"security_number\": null, \"invoice_issuer\": \"Manon STEPHAN E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soin au cabinet\", \"quantity\": 1, \"date_of_service\": \"12-12-2022\", \"mandatory_coverage\": null, \"amount\": 36.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27011182_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"180001331\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LUCAS Oph\\u00e9lie\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RAULT Guy\", \"beneficiary_dob\": null, \"invoice_date\": \"25-10-2022\", \"security_number\": null, \"invoice_issuer\": \"LUCAS Oph\\u00e9lie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"25-10-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27011183_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"180001331\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LUCAS Oph\\u00e9lie\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RAULT Guy\", \"beneficiary_dob\": null, \"invoice_date\": \"28-12-2022\", \"security_number\": null, \"invoice_issuer\": \"LUCAS Oph\\u00e9lie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Attestation s\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"28-12-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27120068_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130011984\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julie Touret-Marin\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": \"11-03-1971\", \"beneficiary_name\": \"M. COLAS Micha\\u00ebl\", \"beneficiary_dob\": \"11-05-1971\", \"invoice_date\": \"11-01-2023\", \"security_number\": null, \"invoice_issuer\": \"Julie Touret-Marin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie \\u00e0 Cabasse\", \"quantity\": 1, \"date_of_service\": \"11-01-2023\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27120071_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130011984\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Julie TOURET-MARIN\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"M COLAS Michael\", \"insured_dob\": \"11-05-1971\", \"beneficiary_name\": \"M COLAS Michael\", \"beneficiary_dob\": \"11-05-1971\", \"invoice_date\": \"04-11-2022\", \"security_number\": \"171052123118029\", \"invoice_issuer\": \"Julie TOURET-MARIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie \\u00e0 Cabri\\u00e8s\", \"quantity\": 1, \"date_of_service\": \"04-11-2022\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27132761_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"550000224\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SICOT Micka\\u00ebl\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VAILLANT Samuel\", \"beneficiary_dob\": \"23-11-1984\", \"invoice_date\": \"31-01-2023\", \"security_number\": \"1841155545089\", \"invoice_issuer\": \"SICOT Micka\\u00ebl\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"31-01-2023\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27232722_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130004427\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Mr MOUGEL Anthony\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"Allizond St\\u00e9phanie\", \"insured_dob\": null, \"beneficiary_name\": \"Allizond St\\u00e9phanie\", \"beneficiary_dob\": null, \"invoice_date\": \"07-03-2023\", \"security_number\": null, \"invoice_issuer\": \"Mr MOUGEL Anthony\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Normal\", \"quantity\": 1, \"date_of_service\": \"07-03-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-27405153_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910003714\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jerome LOCHERT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Elodie Pickaerts\", \"beneficiary_dob\": \"27-02-1986\", \"invoice_date\": \"27-04-2023\", \"security_number\": null, \"invoice_issuer\": \"Jerome LOCHERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"27-04-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-GED E-J-2581853_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"250002698\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Domitille BONO\", \"total_billed\": 63, \"bill_paid\": true, \"amount_paid\": 63, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FILISETTI Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2021\", \"security_number\": null, \"invoice_issuer\": \"Domitille BONO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-04-2021\", \"mandatory_coverage\": null, \"amount\": 63}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-GED E-J-2591884_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"680001542\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie Deroubaix\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"M. Olivier FALLECKER\", \"insured_dob\": null, \"beneficiary_name\": \"M. Olivier FALLECKER\", \"beneficiary_dob\": null, \"invoice_date\": \"13-01-2021\", \"security_number\": \"175126822425869\", \"invoice_issuer\": \"Marie Deroubaix\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-01-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-GED E-J-2591885_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"680001542\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie Deroubaix\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"M. Olivier FALLECKER\", \"insured_dob\": null, \"beneficiary_name\": \"M. Olivier FALLECKER\", \"beneficiary_dob\": null, \"invoice_date\": \"04-06-2021\", \"security_number\": \"175126822425869\", \"invoice_issuer\": \"Marie Deroubaix\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"04-06-2021\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-GED E-J-2602508_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330000803\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ERIC DIEZ\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FOLGADO FREDERIC\", \"beneficiary_dob\": null, \"invoice_date\": \"07-09-2021\", \"security_number\": \"174119521903339\", \"invoice_issuer\": \"ERIC DIEZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-GED E-J1-1065966_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"2B0001028\", \"rpps_number\": \"1000584877\", \"finess_number\": null, \"doctor_name\": \"GALLIOU Yves\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUYON Sylvain\", \"beneficiary_dob\": \"25-04-1979\", \"invoice_date\": \"25-10-2022\", \"security_number\": null, \"invoice_issuer\": \"GALLIOU Yves\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE(S) OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"25-10-2022\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture ost\u00e9opathe-GED N-R-2323015_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570002865\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TREDAN Valentin\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RODRIGUEZ SERGIO\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2022\", \"security_number\": \"267800159000\", \"invoice_issuer\": \"TREDAN Valentin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-26471195_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"787005727\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sylvain BRANDIBAS\", \"total_billed\": 70, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DE WAEGENAERE Ludraine\", \"beneficiary_dob\": null, \"invoice_date\": \"19-07-2022\", \"security_number\": \"277105960606520\", \"invoice_issuer\": \"Sylvain BRANDIBAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-26959315_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920015831\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ROQUES Mathieu\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MOREAU Michael\", \"beneficiary_dob\": null, \"invoice_date\": \"18-11-2022\", \"security_number\": null, \"invoice_issuer\": \"M. ROQUES Mathieu\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-11-2022\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Facture-quittance toutes sp\u00e9cialit\u00e9s-26995130_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"310014022\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maxime Nicola\\u00efeff El\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"FABRE C\\u00e9dric\", \"beneficiary_dob\": \"10-06-1974\", \"invoice_date\": \"19-12-2022\", \"security_number\": null, \"invoice_issuer\": \"Maxime Nicola\\u00efeff El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Premi\\u00e8re Consultation\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156469354facture_etiopathe_Herver_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"\\u00c9tiopathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fabien PERRAUD\", \"total_billed\": 45.0, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Fraslin Herv\\u00e9\", \"insured_dob\": null, \"beneficiary_name\": \"Fraslin Herv\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"16-08-2024\", \"security_number\": \"164135124013\", \"invoice_issuer\": \"Fabien PERRAUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Intervention(s) \\u00e9tiopathique(s)\", \"quantity\": 1, \"date_of_service\": \"16-08-2024\", \"mandatory_coverage\": null, \"amount\": 45.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156469365facture_osteo08.24_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330018458\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Roinard Camille\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Broguier Denis\", \"beneficiary_dob\": null, \"invoice_date\": \"14-08-2024\", \"security_number\": null, \"invoice_issuer\": \"DUPUY S\\u00e9bastien\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins effectu\\u00e9s\", \"quantity\": null, \"date_of_service\": \"14-08-2024\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156469367facture_psy_DOMI_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"819302076\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RAYNAUD Marina E.I.\", \"total_billed\": 110, \"bill_paid\": true, \"amount_paid\": 110, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GAU Dominique\", \"beneficiary_dob\": null, \"invoice_date\": \"23-07-2024\", \"security_number\": null, \"invoice_issuer\": \"Psychologue RAYNAUD Marina E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretien psychologique individuel\", \"quantity\": 1, \"date_of_service\": \"17-07-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}, {\"description\": \"Entretien psychologique individuel\", \"quantity\": 1, \"date_of_service\": \"23-07-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156469373facture_psy_DOMI_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"819302076\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RAYNAUD Marina E.I.\", \"total_billed\": 110, \"bill_paid\": true, \"amount_paid\": 110, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GAU Dominique\", \"beneficiary_dob\": null, \"invoice_date\": \"23-07-2024\", \"security_number\": null, \"invoice_issuer\": \"Psychologue RAYNAUD Marina E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretien psychologique individuel\", \"quantity\": 1, \"date_of_service\": \"17-07-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}, {\"description\": \"Entretien psychologique individuel\", \"quantity\": 1, \"date_of_service\": \"23-07-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156469375osteopathe_renau_anais_le_31-07-2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"110003126\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9rail Martin\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Renau Anais\", \"beneficiary_dob\": null, \"invoice_date\": \"31-07-2024\", \"security_number\": null, \"invoice_issuer\": \"H\\u00e9rail Martin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156469376osteopathe_renau_anais_le_31-07-2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"110003126\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9rail Martin\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Renau Anais\", \"beneficiary_dob\": null, \"invoice_date\": \"31-07-2024\", \"security_number\": null, \"invoice_issuer\": \"H\\u00e9rail Martin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156469377Facture_DAURISFlorian_20240815CGEI_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830000592\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"JOLY Matthieu El\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M DAURIS Florian\", \"beneficiary_dob\": \"25-01-1991\", \"invoice_date\": \"15-08-2024\", \"security_number\": null, \"invoice_issuer\": \"JOLY Matthieu El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Prestation\", \"quantity\": 1, \"date_of_service\": \"15-08-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156469378osteopathe_renau_anais_le_31-07-2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"110003126\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9rail Martin\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Renau Anais\", \"beneficiary_dob\": null, \"invoice_date\": \"31-07-2024\", \"security_number\": null, \"invoice_issuer\": \"H\\u00e9rail Martin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156677823Facture-CONSTANT-F2024-311_240820_122321_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"669303075\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fanny CONSTANT\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Malirach S\\u00e9bastien\", \"beneficiary_dob\": null, \"invoice_date\": \"19-08-2024\", \"security_number\": null, \"invoice_issuer\": \"Fanny CONSTANT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"th\\u00e9rapie familliale\", \"quantity\": 1, \"date_of_service\": \"19-08-2024\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17241566778247779dccd-e833-44f2-8acf-5d5ab7077245_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920006475\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DUHOURCAU Marie\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHERRIER Thomas\", \"beneficiary_dob\": \"12-07-1995\", \"invoice_date\": \"14-08-2024\", \"security_number\": null, \"invoice_issuer\": \"DUHOURCAU Marie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"14-08-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17241566778387779dccd-e833-44f2-8acf-5d5ab7077245_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920006475\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DUHOURCAU Marie\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHERRIER Thomas\", \"beneficiary_dob\": \"12-07-1995\", \"invoice_date\": \"14-08-2024\", \"security_number\": null, \"invoice_issuer\": \"DUHOURCAU Marie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"14-08-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156677847Facture_osteophate_13_08_2024_POIGNANT_Thierry_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620003806\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tanguy Fontaine\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"POIGNANT Thierry\", \"beneficiary_dob\": null, \"invoice_date\": \"13-08-2024\", \"security_number\": null, \"invoice_issuer\": \"Tanguy Fontaine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156677851ostheo_24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130011836\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CARBONI Ludivine\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"THOINET Marion\", \"beneficiary_dob\": \"04-05-1980\", \"invoice_date\": \"22-02-2024\", \"security_number\": null, \"invoice_issuer\": \"CARBONI Ludivine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156681291facture_osteopathe_20_aout_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640001723\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FRANCKE Philippe\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": 25, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gerald GUEDRA\", \"beneficiary_dob\": null, \"invoice_date\": \"20-08-2024\", \"security_number\": null, \"invoice_issuer\": \"Docteur FRANCKE Philippe\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation m\\u00e9dicale\", \"quantity\": 1, \"date_of_service\": \"20-08-2024\", \"mandatory_coverage\": null, \"amount\": 25}, {\"description\": \"soins d'ost\\u00e9opathie non-remboursables\", \"quantity\": 1, \"date_of_service\": \"20-08-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156681296ordonnance_osteopathe_L_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"760005819\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DEMAIS Rachel\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"FLAMENT Laurent\", \"insured_dob\": null, \"beneficiary_name\": \"FLAMENT Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"16-08-2024\", \"security_number\": \"176067645120730\", \"invoice_issuer\": \"THIBAULT FOLL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-08-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156681298001_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"140003617\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas Roisin\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Christian RAULT\", \"beneficiary_dob\": \"14-11-1958\", \"invoice_date\": \"19-08-2024\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie - Alexandre Pag\\u00e8s\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156681300001_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"140003617\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Thomas Roisin\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Christian RAULT\", \"beneficiary_dob\": \"14-11-1958\", \"invoice_date\": \"19-08-2024\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie - Alexandre Pag\\u00e8s\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724156681302Loen_osteopathe__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780004776\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tristan FILY\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lo\\u00ebn HEYMAN\", \"beneficiary_dob\": \"27-08-2023\", \"invoice_date\": \"20-08-2024\", \"security_number\": null, \"invoice_issuer\": \"Tristan FILY (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"20-08-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151680Facture_Julie_Boissel_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maud GERSON\", \"total_billed\": 90.0, \"bill_paid\": true, \"amount_paid\": 90.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOISSEL Julie\", \"beneficiary_dob\": null, \"invoice_date\": \"22-03-2024\", \"security_number\": null, \"invoice_issuer\": \"SARL MG SOPHROLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PREMIERE SEANCE DE SOPHROLOGIE Anamn\\u00e8se et d\\u00e9couverte des premiers outils\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 90.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151715Facture_Julie_Boissel_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Sophrologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maud GERSON\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 65.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOISSEL Julie\", \"beneficiary_dob\": null, \"invoice_date\": \"28-03-2024\", \"security_number\": null, \"invoice_issuer\": \"SARL MG SOPHROLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE DE SUIVI DE SOPHROLOGIE Mise en place d'outils de relaxation psycho-corporelle\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151775psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"339328395\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SEGUIN Julie\", \"total_billed\": 55.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Celine Charlotte\", \"beneficiary_dob\": null, \"invoice_date\": \"03-04-2024\", \"security_number\": null, \"invoice_issuer\": \"SEGUIN Julie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) de psychologie\", \"quantity\": 1, \"date_of_service\": \"03-04-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151777Facture_N_2429036_de_60__du_290324_Osteo_Marjolaine_BOUAISSIER_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"380001651\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marjolaine Bouaissier\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Laurence Mamillon\", \"insured_dob\": null, \"beneficiary_name\": \"Laurence Mamillon\", \"beneficiary_dob\": null, \"invoice_date\": \"29-03-2024\", \"security_number\": \"264104221808034\", \"invoice_issuer\": \"Ost\\u00e9o heyrieux\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151783Facture_Dietetique_24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Di\\u00e9t\\u00e9ticienne\", \"adeli_number\": \"949500474\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lina MENDES\", \"total_billed\": 45, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Delphine DELILLE\", \"beneficiary_dob\": null, \"invoice_date\": \"02-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Lina MENDES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151795facture_du_30_mars_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"760001438\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dr Annick DUBOS\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme GRACIA MEURILLON\", \"beneficiary_dob\": null, \"invoice_date\": \"29-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Dr. DUBOS ANNICK\", \"currency\": \"EUR\", \"items\": [{\"description\": \"soins chiropractice\", \"quantity\": null, \"date_of_service\": \"29-03-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151815IMG_3325_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Soins hospitalier\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 23.0, \"bill_paid\": true, \"amount_paid\": 23.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CAROLL ANNE BAURON RICCI\", \"beneficiary_dob\": null, \"invoice_date\": \"04-04-2024\", \"security_number\": null, \"invoice_issuer\": \"GRAND HOPITAL EST FRANCILIEN\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151819Facture-FACTURE_2024-0982-MEKELLECHE-04-02-2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"759319338\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"AGN\\u00c8S TURPIN EI\", \"total_billed\": 90, \"bill_paid\": true, \"amount_paid\": 90, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MELISSA MEKELLECHE\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2024\", \"security_number\": null, \"invoice_issuer\": \"AGN\\u00c8S TURPIN EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"04-02-2024\", \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151828Facture-FACTURE_2024-1152-MEKELLECHE-31-03-2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"759319338\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"AGN\\u00c8S TURPIN EI\", \"total_billed\": 90, \"bill_paid\": true, \"amount_paid\": 90, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MELISSA MEKELLECHE\", \"beneficiary_dob\": null, \"invoice_date\": \"31-03-2024\", \"security_number\": null, \"invoice_issuer\": \"AGN\\u00c8S TURPIN EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"31-03-2024\", \"mandatory_coverage\": null, \"amount\": 90}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151843OPPON_GEORGES_FACTURE_LENTILLES_2023_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"942612987\", \"doctor_name\": \"BOUCHARDON\", \"total_billed\": 310.0, \"bill_paid\": true, \"amount_paid\": 310.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 310.0, \"remaining_payment\": 0, \"insured_name\": \"OPPON GEORGES\", \"insured_dob\": \"16-11-1971\", \"beneficiary_name\": \"OPPON GEORGES\", \"beneficiary_dob\": \"16-11-1971\", \"invoice_date\": \"12-06-2023\", \"security_number\": \"171119306615057\", \"invoice_issuer\": \"Optic Alain\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 155.0}, {\"description\": \"Lentilles\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 155.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151851OPPON_GEROGES_FACTURE_LENTILLES_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"finess_number\": \"942612987\", \"doctor_name\": \"BOUCHARDON\", \"total_billed\": 310.0, \"bill_paid\": true, \"amount_paid\": 310.0, \"complementary_coverage\": null, \"client_part\": 310.0, \"remaining_payment\": 0.0, \"insured_name\": \"M. OPPON Georges\", \"insured_dob\": \"16-11-1971\", \"invoice_date\": \"25-03-2024\", \"security_number\": \"171119306615057\", \"invoice_issuer\": \"Optic Alain\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPH - ACUVUE OASYS MULTIFOCAL HIGH\", \"quantity\": 2, \"amount\": 310.0}], \"beneficiary_name\": \"M. OPPON Georges\", \"beneficiary_dob\": \"16-11-1971\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151864OPPON_GEORGES_ORDO_LENT_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"finess_number\": \"942612987\", \"invoice_issuer\": \"OPTIC ALAIN\", \"invoice_date\": null, \"currency\": \"EUR\", \"items\": [], \"insured_dob\": null, \"beneficiary_dob\": null, \"doctor_name\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226151866livretDeFamille_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587911090424_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"750003477\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Beno\\u00eet DUFR\\u00c9NOY\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LHEZ S\\u00e9verine\", \"beneficiary_dob\": null, \"invoice_date\": \"08-04-2024\", \"security_number\": null, \"invoice_issuer\": \"POSE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"08-04-2024\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587915Facture_kiro_fred_mars_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"940008642\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9l\\u00e8ne VAUDABLE D.C.\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Frederic Perthu\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"15-03-2024\", \"security_number\": null, \"invoice_issuer\": \"H\\u00e9l\\u00e8ne VAUDABLE D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation(s) chiropratique(s)\", \"quantity\": 1, \"date_of_service\": \"15-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587918facture_kiro_vanessa_29_mars_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"940008642\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9l\\u00e8ne VAUDABLE D.C.\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vanessa Perthu\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"29-03-2024\", \"security_number\": null, \"invoice_issuer\": \"H\\u00e9l\\u00e8ne VAUDABLE D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation(s) chiropratique(s)\", \"quantity\": 1, \"date_of_service\": \"29-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587920facture_kiro_manon_2_mars_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"940008642\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9l\\u00e8ne VAUDABLE D.C.\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Manon Perthu\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"02-03-2024\", \"security_number\": null, \"invoice_issuer\": \"H\\u00e9l\\u00e8ne VAUDABLE D.C.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation(s) chiropratique(s)\", \"quantity\": 1, \"date_of_service\": \"02-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587921facture_osteo_23.03.2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"930003363\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Constant C\\u00e9cile\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Roxane Fretier\", \"beneficiary_dob\": \"14-12-1996\", \"invoice_date\": \"23-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Constant C\\u00e9cile\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"23-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17242265879272024-03-27_Facture_Psyco_-Ma_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"759366206\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"TOLLANCE Alexandra\", \"total_billed\": 70, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIALLE Ma\\u00e9va\", \"beneficiary_dob\": \"15-09-2000\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"TOLLANCE Alexandra\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"21-03-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17242265879292024-03-28_Facture_Chiroprateur_-Ch_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"940009947\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Muriel SMILA-GARROT\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIALLE Christelle\", \"beneficiary_dob\": null, \"invoice_date\": \"28-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Muriel SMILA-GARROT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation chiropratique\", \"quantity\": 1, \"date_of_service\": \"28-03-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17242265879312024-02-19_Osteo_facture_-Xa_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940004666\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DUMESGE C\\u00e9dric\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIALLE Xavier\", \"beneficiary_dob\": \"25-03-1973\", \"invoice_date\": \"19-02-2024\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Dumesge\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17242265879332024-03-04_Osteo_facture_-Xa_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940004666\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DUMESGE C\\u00e9dric\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"VIALLE Xavier\", \"beneficiary_dob\": \"25-03-1973\", \"invoice_date\": \"04-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie Dumesge\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587949AnyScanner_03_25_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chana\\u00ebl, Elyana KAKOU\", \"beneficiary_dob\": \"15-03-2024\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587958Consultation_psychologie_Nathan_MERTEN_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"579302241\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VOSGIEN Val\\u00e9rie\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Merten Nathan\", \"insured_dob\": null, \"beneficiary_name\": \"Merten Nathan\", \"beneficiary_dob\": null, \"invoice_date\": \"08-04-2024\", \"security_number\": \"173055760605457\", \"invoice_issuer\": \"VOSGIEN Val\\u00e9rie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"08-04-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587965Consultation_psychologie_Nathan_MERTEN_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"579302241\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VOSGIEN Val\\u00e9rie\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Merten Nathan\", \"insured_dob\": null, \"beneficiary_name\": \"Merten Nathan\", \"beneficiary_dob\": null, \"invoice_date\": \"08-04-2024\", \"security_number\": \"173055760605457\", \"invoice_issuer\": \"VOSGIEN Val\\u00e9rie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"08-04-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587971Osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"540001237\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ZANCHIN David\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Pandolfi Estelle\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"ZANCHIN David\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie au tarif de 55 \\u20ac\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587975seance_ostheo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130002439\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fran\\u00e7ois CABANIS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fr\\u00e9d\\u00e9ric Begouin\", \"beneficiary_dob\": null, \"invoice_date\": \"28-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Fran\\u00e7ois CABANIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"28-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724226587979Seances_ostheo_02_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130002439\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fran\\u00e7ois CABANIS\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Frederic Begouin\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Fran\\u00e7ois CABANIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658126026douilly_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"M\\u00e9decine G\\u00e9n\\u00e9rale\", \"rpps_number\": \"10100941052\", \"doctor_name\": \"Docteur BERREBY Ilan\", \"insured_name\": null, \"invoice_date\": \"09-04-2024\", \"invoice_issuer\": \"Docteur BERREBY Ilan\", \"items\": [], \"beneficiary_name\": \"M. Louis LEDRU\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658126054image16-04-2024-173146-1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Osteopathe\", \"adeli_number\": \"950003806\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Morgane LAUDIC\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"Elena Petit\", \"security_number\": null, \"invoice_issuer\": \"LAIDIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"08-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658126062Facture_osteo_DOUCHET_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"070000724\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Virginie CAPARROS GUILLET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Douchet Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Virginie CAPARROS GUILLET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658126093MM_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"449319763\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lucas BINI\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marina MATIAS\", \"beneficiary_dob\": \"31-08-1999\", \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"EI Lucas BINI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658126132osteo_22_mars_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830002192\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Olivier AUBERT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DETALLE Sandrine\", \"beneficiary_dob\": null, \"invoice_date\": \"22-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Olivier AUBERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658126141JACOB_Francois_2024-111_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658126141JACOB_Francois_2024-111_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"invoice_number\": \"2024-111\", \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670007087\", \"doctor_name\": \"CHARLES Fanny\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"insured_name\": null, \"invoice_date\": \"16-04-2024\", \"invoice_issuer\": \"Mme CHARLES Fanny\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"date_of_service\": \"16-04-2024\", \"amount\": 55}], \"beneficiary_name\": \"JACOB Fran\\u00e7ois\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17246582258432400585-goubier-sandrine-1835799692661e42158b3a5_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"420002057\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Judith Lavaire\", \"total_billed\": 58.0, \"bill_paid\": true, \"amount_paid\": 58.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sandrine GOUBIER\", \"beneficiary_dob\": \"07-04-1987\", \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Judith Lavaire\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 58.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658225847osteo_du_17-04-24_CHIROUX_Theo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"420003097\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laure POYET\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHIROUX Theo\", \"beneficiary_dob\": \"29-08-1999\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Laure POYET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-06-2024\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658225850OSTEO1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330009192\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"OUZOULIAS Fran\\u00e7ois E i\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAMOUROUX Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"11-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Mr OUZOULIAS Fran\\u00e7ois E i Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658225853OSTEO_2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330009192\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"OUZOULIAS Fran\\u00e7ois E i\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LATOUROUX Eric\", \"beneficiary_dob\": null, \"invoice_date\": \"08-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Mr OUZOULIAS Fran\\u00e7ois E i Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/172465822585720240416133632216_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440003416\", \"doctor_name\": \"Matthieu SINENBERG\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"beneficiary_name\": \"Marie LANGREE\", \"invoice_date\": \"15-04-2024\", \"invoice_issuer\": \"Matthieu SINENBERG \\u2013 Ost\\u00e9opathe DO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie\", \"amount\": 55}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658225863rembourseemnt_OSTEO_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440003416\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Matthieu SINENBERG\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Harouna SY\", \"beneficiary_dob\": null, \"invoice_date\": \"11-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Matthieu SINENBERG - Ost\\u00e9opathe DO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-04-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17246582258660E8D5B57-6455-4AB8-8734-FB3FF68791F1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"920003902\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"J\\u00e9r\\u00e9my FRECAUT\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GUERRERO Leslie\", \"beneficiary_dob\": null, \"invoice_date\": \"01-03-2024\", \"security_number\": null, \"invoice_issuer\": \"J\\u00e9r\\u00e9my FRECAUT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"01-03-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658225868facture_PECOURT_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"210001715\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MEGAN VIARD\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PECOURT Manon\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"MEGAN VIARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658298461facture_podologue_alicia_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"10108876599\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MARTIN Charlotte\", \"total_billed\": 32, \"bill_paid\": true, \"amount_paid\": 32, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sinon Alicia\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"MARTIN Charlotte\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins de p\\u00e9dicuries\", \"quantity\": null, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 32}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658298465Facture_chiro_17_avril_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"600003248\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MME OUCHART LYDIE\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme CHALVET Laurie\", \"beneficiary_dob\": \"21-09-1987\", \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"MME OUCHART LYDIE Chiropracteur\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Prestation\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658298467Facture_Osteopatie_Melanie_HAMON_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940000219\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Philippe Jeand'Heur\", \"total_billed\": 75.0, \"bill_paid\": true, \"amount_paid\": 75.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M\\u00e9lanie HAMON\", \"beneficiary_dob\": \"30-09-1987\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Philippe Jeand'Heur\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 75.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658298482BENTO22032024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"789330222\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Yannick Libourel\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BENTO DA SILVA Gallien\", \"beneficiary_dob\": null, \"invoice_date\": \"22-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Yannick Libourel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"22-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658298484BENTO29032024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"789330222\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Yannick Libourel\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BENTO DA SILVA Gallien\", \"beneficiary_dob\": null, \"invoice_date\": \"29-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Yannick Libourel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"29-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658298486BENTO06042024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"789330222\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Yannick Libourel\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BENTO DA SILVA Gallien\", \"beneficiary_dob\": null, \"invoice_date\": \"05-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Yannick Libourel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"05-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658298501BENTO12042024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"789330222\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Yannick Libourel\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BENTO DA SILVA Gallien\", \"beneficiary_dob\": null, \"invoice_date\": \"12-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Yannick Libourel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"12-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658298509240405_Osteopathe_Pauline_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"136018173\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Eve ARTOIS\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"PERRAUT Pauline\", \"insured_dob\": null, \"beneficiary_name\": \"PERRAUT Pauline\", \"beneficiary_dob\": null, \"invoice_date\": \"05-04-2024\", \"security_number\": \"286088920603401\", \"invoice_issuer\": \"Eve ARTOIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"05-04-24\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658376419facture_ostheo_avril_24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"490003860\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Camille MERCERON\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GENDRON Val\\u00e9rie\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Camille MERCERON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Honoraire\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658376424KM_C250i24032713020_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"811719756\", \"rpps_number\": \"10002897162\", \"doctor_name\": \"Dr Gilles LESIEUR\", \"total_billed\": 16.2, \"bill_paid\": true, \"amount_paid\": 16.2, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"invoice_date\": \"27-03-2024\", \"insured_name\": null, \"invoice_issuer\": \"Centre ophtalmologique Iridis\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de r\\u00e9fraction\", \"amount\": 16.2}], \"beneficiary_name\": \"SOUBRI\\u00c9 ISABELLE\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658376426facture_psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychiatrie\", \"adeli_number\": \"971007612\", \"rpps_number\": \"10002331352\", \"doctor_name\": \"Dr LAVINIA DOBRESCU\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 59.5, \"mandatory_coverage\": 60.5, \"complementary_coverage\": 0.0, \"client_part\": 59.5, \"remaining_payment\": 0.0, \"insured_name\": \"MAILLOT JUDICAEL\", \"insured_dob\": \"24-11-1978\", \"beneficiary_name\": \"MAILLOT JUDICAEL\", \"beneficiary_dob\": \"24-11-1978\", \"invoice_date\": \"04-04-2024\", \"security_number\": \"1781197416229\", \"invoice_issuer\": \"Dr LAVINIA DOBRESCU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CNP\", \"date_of_service\": \"04-04-2024\", \"mandatory_coverage\": 51.0, \"amount\": 110.5}, {\"description\": \"MPC\", \"date_of_service\": \"04-04-2024\", \"mandatory_coverage\": 4.5, \"amount\": 4.5}, {\"description\": \"MCS\", \"date_of_service\": \"04-04-2024\", \"mandatory_coverage\": 5.0, \"amount\": 5.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658376437factosteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950007716\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Erwin PETERI\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MALOIGNE Amelie\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Erwin PETERI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724658376447Facture_ostheo_Matheo_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780001574\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Axel ZANCHET\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"GIACHINI Math\\u00e9o\", \"insured_dob\": null, \"beneficiary_name\": \"GIACHINI Math\\u00e9o\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": \"187049025080\", \"invoice_issuer\": \"Axel ZANCHET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17246583GUI_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"349319962\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LEENHARDT Ana\\u00eble\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nicolas GUIRAUD\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"LEENHARDT Ana\\u00eble\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretien clinique\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17256072181781713458102656768366008212813442_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"470001041\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Benjamin MAURIG\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DALOUCHE DAMIEN\", \"beneficiary_dob\": \"15-07-1985\", \"invoice_date\": \"04-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Benjamin MAURIG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": null, \"date_of_service\": \"04-04-24\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/172563267515717135215789872986009598841823360_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Daniel FIGUEIRAS\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BERTHON Guillaume\", \"beneficiary_dob\": null, \"invoice_date\": \"18-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Daniel FIGUEIRAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-04-2014\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/172563267517320240419_133656_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"849303391\", \"doctor_name\": \"Delphine HAUTBOUT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"insured_name\": null, \"invoice_date\": \"16-04-2024\", \"invoice_issuer\": \"DELPHINE HAUTBOUT PSYCHOLOGUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"date_of_service\": \"16-04-2024\", \"amount\": 60}], \"beneficiary_name\": \"M. Bastien Laurent\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/acte_nass_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/acte_nass_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"insured_dob\": null, \"beneficiary_dob\": null, \"doctor_name\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/dfsfsdf_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"669303869\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Christine CARUEL\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Katchadourian Sabine\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Christine CARUEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f22ss_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nathalie Poussou\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Yannis MAILLOT\", \"beneficiary_dob\": null, \"invoice_date\": \"30-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Nathalie Poussou\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance d'habilet\\u00e9s sociales\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/sdsqd_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640000758\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GUET Gr\\u00e9gory\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"COMETS JEAN PIERRE\", \"insured_dob\": null, \"beneficiary_name\": \"COMETS JEAN PIERRE\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": \"148016424900155\", \"invoice_issuer\": \"GUET Gr\\u00e9gory\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999412Osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"540001237\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ZANCHIN David\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PANDOLFI Estelle\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"ZANCHIN David\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie au tarif de 55 \\u20ac\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243969994532400313-simon-heilyn-1693195547661d58d7e884c_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630001378\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Assma SMATI\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Heilyn SIMON\", \"beneficiary_dob\": \"15-10-2009\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Assma SMATI E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999460facture_lentille_garreau1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"91112604-3\", \"finess_number\": \"912601499\", \"doctor_name\": \"HEIDARI NEJAD Habib\", \"total_billed\": 49.0, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Liliane GARREAU\", \"insured_dob\": \"27-03-1942\", \"beneficiary_name\": \"Liliane GARREAU\", \"beneficiary_dob\": \"27-03-1942\", \"invoice_date\": \"28-03-2024\", \"security_number\": \"242037858900432\", \"invoice_issuer\": \"SARL BIEN VOIR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Johnson&Johnson Vision Care-1 Rue Camille Desmoulins-FR-92130 Issy-les-Moulineaux 12x ACUVUE OASYS 12 Lentille Souple, Jetable Port Prolong\\u00e9; Conditionnement: 12 Quantit\\u00e9: 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 49.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999465IMG_20240416_0001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"190000422\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lia TEILLOL\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Mr J\\u00e9rome Goulmy\", \"insured_dob\": null, \"beneficiary_name\": \"Mr J\\u00e9rome Goulmy\", \"beneficiary_dob\": null, \"invoice_date\": \"11-04-2024\", \"security_number\": \"176121903107343\", \"invoice_issuer\": \"Aur\\u00e9lia TEILLOL Ost\\u00e9opathe, Entrepreneur Individuel\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999467IMG_20240416_210050_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Pharmacie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 23.3, \"bill_paid\": true, \"amount_paid\": 23.3, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 23.3, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUCHER MAEVA\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2024\", \"security_number\": null, \"invoice_issuer\": \"PHARMACIE DU CENTRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AUTOTEST ANTIGENIQUE C\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 3.9}, {\"description\": \"MUCODRILL 600MG CPR EF\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 6.5}, {\"description\": \"NHCO VITOREL BLISTER 3\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 12.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243969994762300080-gielczynski-clara-64156159563d0147d118ac-2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"500002233\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9l\\u00e8ne DEUIL\", \"total_billed\": 55.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 55.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Clara GIELCZYNSKI\", \"beneficiary_dob\": \"04-05-2007\", \"invoice_date\": \"24-01-2023\", \"security_number\": null, \"invoice_issuer\": \"H\\u00e9l\\u00e8ne DEUIL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"24-01-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999482OstAo_Christian_16-04_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690002985\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vincent ROBIN\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Christian MARCHAS\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Vincent ROBIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999502CamScanner_29-03-2024_11.38_1_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alessia, Ida BOAGLIO\", \"beneficiary_dob\": \"26-03-2024\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999520facture_osteo_fabien_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"570003749\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Loic Keller\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Kacymaneck Fabien\", \"beneficiary_dob\": null, \"invoice_date\": \"10-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Antoine TORNABENE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243969995232400015-gielczynski-jeanne-131369953265965e69a20c6_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"500002233\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9l\\u00e8ne DEUIL\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jeanne GIELCZYNSKI\", \"beneficiary_dob\": \"28-04-1979\", \"invoice_date\": \"04-01-2024\", \"security_number\": null, \"invoice_issuer\": \"H\\u00e9l\\u00e8ne DEUIL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"04-01-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243969995252400021-gielczynski-louise-17494429056596ad03d4ae2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"500002233\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"H\\u00e9l\\u00e8ne DEUIL\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Louise GIELCZYNSKI\", \"beneficiary_dob\": \"19-04-2011\", \"invoice_date\": \"04-01-2024\", \"security_number\": null, \"invoice_issuer\": \"H\\u00e9l\\u00e8ne DEUIL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"04-01-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999539facture_osteopathe__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950007468\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MARIANNIE JESSICA\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cantal Samantha\", \"beneficiary_dob\": null, \"invoice_date\": \"06-04-2024\", \"security_number\": null, \"invoice_issuer\": \"MARIANNIE JESSICA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"06-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999541Image_2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DOCTEUR COULANGE Isabelle\", \"total_billed\": 48.0, \"bill_paid\": true, \"amount_paid\": 48.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"MICHEL BERNARD\", \"insured_dob\": \"14-02-1956\", \"beneficiary_name\": \"MICHEL BERNARD\", \"beneficiary_dob\": \"14-02-1956\", \"invoice_date\": \"15-04-2024\", \"security_number\": \"1560402828323\", \"invoice_issuer\": \"DOCTEUR COULANGE Isabelle\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CS\", \"quantity\": null, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 48.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724396999545image_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathie\", \"adeli_number\": \"860002005\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"BECOURT\", \"insured_dob\": null, \"beneficiary_name\": \"BECOURT\", \"beneficiary_dob\": null, \"invoice_date\": \"03-04-24\", \"security_number\": \"293092432208235\", \"invoice_issuer\": \"Centre d'Ost\\u00e9opathie Pour Tous\", \"currency\": \"EUR\", \"items\": [{\"description\": \"UNE CONSULTATION D'OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"03-04-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243969995522300584-guerrache-jessica-57560893664d4b237dd783_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670006840\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Oc\\u00e9ane ZAEPFEL\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jessica GUERRACHE\", \"beneficiary_dob\": \"08-03-1984\", \"invoice_date\": \"10-08-2023\", \"security_number\": null, \"invoice_issuer\": \"Oc\\u00e9ane ZAEPFEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-08-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243969995542300676-guerrache-jessica-185907478364f5a55cbbbda_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670006840\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Oc\\u00e9ane ZAEPFEL\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jessica GUERRACHE\", \"beneficiary_dob\": \"08-03-1984\", \"invoice_date\": \"04-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Oc\\u00e9ane ZAEPFEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"04-09-2023\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243971209982400066-guerrache-jessica-83960773165b0ce7dbef5b_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670006840\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Oc\\u00e9ane ZAEPFEL\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jessica GUERRACHE\", \"beneficiary_dob\": \"08-03-1984\", \"invoice_date\": \"24-01-2024\", \"security_number\": null, \"invoice_issuer\": \"Oc\\u00e9ane ZAEPFEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"24-01-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243971210002400264-guerrache-jessica-138101166661a587fa4556_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670006840\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Oc\\u00e9ane ZAEPFEL\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": \"08-03-1984\", \"beneficiary_name\": \"Jessica GUERRACHE\", \"beneficiary_dob\": \"08-03-1984\", \"invoice_date\": \"13-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Oc\\u00e9ane ZAEPFEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"13-04-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243971210022_factures_naturopathie_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243971210022_factures_naturopathie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Naturopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Eva Heinsdorf Teissier EI\", \"total_billed\": 90.0, \"bill_paid\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Samia Hamdaoui\", \"beneficiary_dob\": null, \"invoice_date\": \"18-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Valetudo Integra\", \"currency\": \"EUR\", \"items\": [{\"description\": \"massage\", \"quantity\": 1, \"amount\": 90.0}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Naturopathie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Eva Heinsdorf Teissier EI\", \"total_billed\": 90.0, \"bill_paid\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Samia Hamdaoui\", \"beneficiary_dob\": null, \"invoice_date\": \"05-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Valetudo Integra\", \"currency\": \"EUR\", \"items\": [{\"description\": \"massage\", \"quantity\": 1, \"amount\": 90.0}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121007IMG_2973_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"750026056\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Charlotte PELOUIN\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE DRET Stephane\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Charlotte PELOUIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) de chiropraxie\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121013seance_osteopathe_16-04-2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"330013378\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sara TACLET\", \"total_billed\": 60.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FOUCAUD FABIEN\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Sara TACLET El Ost\\u00e9opathe D.O\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION OSTEOPATHIE\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121019FACTURE-24-04-928-ZIEMINYKAS_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"549313823\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VIRGINIE FUSSIEN SAINT GIRONS\", \"total_billed\": 60.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Tomas ZIEMINYKAS\", \"beneficiary_dob\": null, \"invoice_date\": \"14-04-2024\", \"security_number\": null, \"invoice_issuer\": \"VIRGINIE FUSSIEN SAINT GIRONS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychoth\\u00e9rapie\", \"quantity\": 1.0, \"date_of_service\": \"14-04-2024\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121036PSY_AVRIL_RIPOLL_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"329302392\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Le\\u00efla BOURREL\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MR RIPOLL\", \"beneficiary_dob\": null, \"invoice_date\": \"09-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Le\\u00efla BOURREL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"08-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121048facture_kine_15-04-24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"207402140\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"STEPHANE ABIB\", \"total_billed\": 88.4, \"bill_paid\": true, \"amount_paid\": 35.35, \"mandatory_coverage\": 53.05, \"complementary_coverage\": 0.0, \"client_part\": 35.35, \"remaining_payment\": 0.0, \"insured_name\": \"SQUAGLIA Stephanie\", \"insured_dob\": \"02-05-1975\", \"beneficiary_name\": \"SQUAGLIA Stephanie\", \"beneficiary_dob\": \"02-05-1975\", \"invoice_date\": \"15-04-2024\", \"security_number\": \"275052003301694\", \"invoice_issuer\": \"Mr STEPHANE ABIB\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMK 8\", \"quantity\": 5, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 17.68}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121050Adobe_Scan_17_avr._2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Pharmacie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"852006329\", \"doctor_name\": null, \"total_billed\": 45.96, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHOUIKHA AURORE\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"PHARMACIE DU MOULIN ROUGE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"DOLIPRANETABS 1 000 MG CPR 8\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 3.96}, {\"description\": \"DYNABIANE START GEL B/30 REF 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 14.12}, {\"description\": \"DYNABIANE FOCUS CPR B/15 REF 0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 16.02}, {\"description\": \"MUCOMYST 200MG SACHET 18\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 3.55}, {\"description\": \"RHINADVIL CPR 20\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.36}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121053FACTUREROY_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"929330462\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lie Broc\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Anthony ROY\", \"beneficiary_dob\": \"03-07-1994\", \"invoice_date\": \"26-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Aur\\u00e9lie Broc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121055F.__Louis_Guillaume_G_030424_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121055F.__Louis_Guillaume_G_030424_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"invoice_number\": \"240403G\", \"profession\": \"Psychologue\", \"adeli_number\": \"9D9302889\", \"doctor_name\": \"CATHERINE DELAUNE PARASSOURAMIN\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"insured_name\": null, \"invoice_date\": \"03-04-2024\", \"invoice_issuer\": \"CATHERINE DELAUNE PARASSOURAMIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation 1Heure\", \"quantity\": 1, \"date_of_service\": \"03-04-2024\", \"amount\": 75}], \"beneficiary_name\": \"GONNEAU Louis Guillaume\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121061Numerisation_20240417_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"729301077\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jeanne MARMARA\", \"total_billed\": 200, \"bill_paid\": true, \"amount_paid\": 200, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Louis CORNEC\", \"beneficiary_dob\": null, \"invoice_date\": \"05-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Jeanne MARMARA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"02-02-2024\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"08-02-2024\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-02-2024\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"05-03-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121098Screenshot_20240417_102951_Samsung_Notes_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FLORIANE LECOINTE FAIDY\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"B\\u00e9n\\u00e9dicte MERIGEAU\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2024\", \"security_number\": null, \"invoice_issuer\": \"FLORIANE LECOINTE FAIDY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance Hypnose\", \"quantity\": 1.0, \"date_of_service\": \"13-04-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121103Osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"930002779\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"S\\u00e9verine MARQUES\", \"total_billed\": 65, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Hevy Alexandre\", \"insured_dob\": null, \"beneficiary_name\": \"Hervy Alexandre\", \"beneficiary_dob\": null, \"invoice_date\": \"11-04-2024\", \"security_number\": \"18908370507233\", \"invoice_issuer\": \"OSTEOPATHES D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"11-04-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397121107image0_4_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"759369929\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DANA\\u00cb HOLLER\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Albane De MONTAIGNAC\", \"beneficiary_dob\": \"07-02-1995\", \"invoice_date\": \"04-04-2024\", \"security_number\": null, \"invoice_issuer\": \"DANA\\u00cb HOLLER PSYCHOLOGUE PSYCHOTHERAPEUTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychologie Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"04-04-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125625img229_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Romain BARRET\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Filali H\\u00e9l\\u00e8ne\", \"insured_dob\": null, \"beneficiary_name\": \"Filali H\\u00e9l\\u00e8ne\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2024\", \"security_number\": \"242066938221220\", \"invoice_issuer\": \"Romain BARRET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125629Facture_psychologue__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"789311222\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie-Claire BERTHOMIER\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUBOIS Agn\\u00e8s\", \"beneficiary_dob\": null, \"invoice_date\": \"10-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Marie-Claire BERTHOMIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en entretien clinique\", \"quantity\": 1, \"date_of_service\": \"10-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125633ostheopate4_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060000684\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"YAHE JEAN-BAPTISTE\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"MOLINARI VERONIQUE\", \"insured_dob\": null, \"beneficiary_name\": \"MOLINARI VERONIQUE\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": \"2550606088232\", \"invoice_issuer\": \"YAHE JEAN-BAPTISTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/172439712563420240417_105010_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"M\\u00e9decine du sport\", \"adeli_number\": \"591082060\", \"rpps_number\": \"10101388550\", \"doctor_name\": \"Dr charles BUISSET\", \"beneficiary_name\": \"LANSELLE Ludovic\", \"invoice_date\": \"17-04-2024\", \"invoice_issuer\": \"Dr charles BUISSET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation m\\u00e9dicale\", \"amount\": 26.5}, {\"description\": \"soins de m\\u00e9soth\\u00e9rapie non remboursable par la CPAM\", \"amount\": 30.0}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125640FACTURE_HOSTEO__1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"640007894\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pierre CHAPON\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jeannine Lepicier\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2024\", \"security_number\": null, \"invoice_issuer\": \"E.I. Pierre CHAPON - Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125647facture_osteoathie_Maxime_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590013975\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Claire DUTRIEUX\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Maxime GUHL\", \"beneficiary_dob\": null, \"invoice_date\": \"25-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Claire DUTRIEUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"25-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125653noname_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"finess_number\": \"461800922\", \"rpps_number\": \"10100984623\", \"doctor_name\": \"PIERNE Kevin\", \"total_billed\": 42.0, \"bill_paid\": true, \"amount_paid\": 42.0, \"insured_name\": \"SANS Charlene\", \"invoice_date\": \"08-04-2024\", \"security_number\": \"297079152122929\", \"invoice_issuer\": \"Cabinet de Castelnau-Montratier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY8\", \"amount\": 20.8}, {\"description\": \"Hors Nomenclature\", \"amount\": 21.2}], \"beneficiary_name\": \"SANS Charlene\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125658received_451290077337699_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jayden, Louis, Laurent COIRIER\", \"beneficiary_dob\": \"15-01-2024\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125670Facture_lentilles_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"922614250\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 46.9, \"bill_paid\": true, \"amount_paid\": 46.9, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Camille Trapon\", \"beneficiary_dob\": null, \"invoice_date\": \"06-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Krys\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentilles de contact Dailies AquaComfort Plus x Krys Boite de 90 Alcon\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 46.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125672fact_osteo_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130023153\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HAKEM Arthur\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DELFINO Christiane\", \"beneficiary_dob\": null, \"invoice_date\": \"22-02-2024\", \"security_number\": null, \"invoice_issuer\": \"HAKEM Arthur Ost\\u00e9opathe D.O.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"22-02-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125675Facture_ostheopathe_17.04.2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"510003254\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SEDRATI Kenza\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAMBERT Karine\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"CABINET D' OSTEOPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Ost\\u00e9opathie - Consultation de LAMBERT Karine le 17/04/2024\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125678ordo_05.03.2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": \"10002232022\", \"finess_number\": null, \"doctor_name\": \"Christophe SOYEZ\", \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BERNARD Catherine\", \"beneficiary_dob\": null, \"invoice_date\": \"05-03-2024\", \"security_number\": null, \"invoice_issuer\": \"CABINET MEDICAL \\u00ab LES NOISETIERS \\u00bb\", \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397125682facture_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Pharmacie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 33.0, \"bill_paid\": true, \"amount_paid\": 33.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BERNARD CATHERINE\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2024\", \"security_number\": null, \"invoice_issuer\": \"PHARMACIE V2\", \"currency\": \"EUR\", \"items\": [{\"description\": \"IGNATIA AMARA 15CH DO GL BOI\", \"quantity\": 6, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}, {\"description\": \"PHOSPHORUS 30CH DO GL BOI\", \"quantity\": 6, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/172439712568417_04_2024_Note_dhonoraire_BELDA_Rafaele_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"499312338\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emilie Legroux\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BELDA Rafaele\", \"beneficiary_dob\": \"11-04-2004\", \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Emilie Legroux - EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366199Ostheopathe_30-03-24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"780010435\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RIVILLON Maxime\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BLONDIN OLLIVIER\", \"beneficiary_dob\": null, \"invoice_date\": \"30-03-2024\", \"security_number\": null, \"invoice_issuer\": \"RIVILLON Maxime EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation de soins ost\\u00e9opathiques\", \"quantity\": 1, \"date_of_service\": \"30-03-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243973662012024.04.16_-_Osteo_Elrik_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"260000781\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pierre \\u00c9douard PREVOST\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BARBATO Elrik\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Pierre \\u00c9douard PREVOST\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366206acte_de_naissance_Ilyan_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ilyan, Kenzi, Daou BOUKRIF RIMBERT\", \"beneficiary_dob\": \"25-03-2024\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366221osteopathe1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"590001517\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Aur\\u00e9lie Esposto\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEBAS Andr\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Aur\\u00e9lie Esposto\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366225recu-fac-2024-03-000329_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"020001160\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SILVA Micka\\u00ebl El\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HUERTAS Virginie\", \"beneficiary_dob\": \"29-02-1976\", \"invoice_date\": \"29-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Ost\\u00e9opathie de Braine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Premi\\u00e8re consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"29-03-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366226MM_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"449319763\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lucas BINI\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marina MATIAS\", \"beneficiary_dob\": \"31-08-1999\", \"invoice_date\": \"15-04-2024\", \"security_number\": null, \"invoice_issuer\": \"EI Lucas BINI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366227Scan_Attestation_soin_MCT_15_av_2024_Mme_Nadine_ROMERO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Acupuncture\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Cl\\u00e9mentine PARRA\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Romero Nadine\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Cl\\u00e9mentine PARRA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de soin et / ou d'acupuncture traditionnelle\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366229Facture_RIVALLANDChrystelle_20240413ICQO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"770002657\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BILLOT Gwenaelle\", \"total_billed\": 70.0, \"bill_paid\": 70.0, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIVALLAND Chrystelle\", \"beneficiary_dob\": \"20-02-1978\", \"invoice_date\": \"13-04-2024\", \"security_number\": null, \"invoice_issuer\": \"BILLOT Gwenaelle , EI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Premi\\u00e8re visite\", \"quantity\": 1, \"date_of_service\": \"13-04-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/172439736623117133541856907286967070620590202_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"837004605\", \"rpps_number\": \"10101690252\", \"doctor_name\": \"SEILER Maxime\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"beneficiary_name\": \"Mme ARMAND Jeanine\", \"invoice_date\": \"11-04-2024\", \"invoice_issuer\": \"ESPACE SANTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'Ost\\u00e9opathie\", \"amount\": 70}], \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366237IMG_20240417_133143_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"690001896\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Carole LAYEC\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LACOM Jean Marc\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"E.I. Mme Carole LAYEC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation(s) d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"16-04-2024\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17243973662518A42024B-6214-4DDE-A3C0-3CA27FC89720_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"L\\u00e9o, Elie, Jean-Pierre DIELNA ELISABETH\", \"beneficiary_dob\": \"13-04-2024\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366263FACTURES_OSTEOPATHE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"290000280\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Faubert B\\u00e9atrice\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2024\", \"security_number\": null, \"invoice_issuer\": \"SARL LARUPE - BODENES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366265Facture_consultation_du_090424_psychologue_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366265Facture_consultation_du_090424_psychologue_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"invoice_number\": \"375\", \"profession\": \"Psychologue\", \"adeli_number\": \"419305057\", \"doctor_name\": \"Mathieu Le Samedy\", \"total_billed\": 60.0, \"bill_paid\": false, \"amount_paid\": 0.0, \"remaining_payment\": 60.0, \"insured_name\": null, \"invoice_date\": \"09-04-2024\", \"invoice_issuer\": \"Mathieu Le Samedy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation le 09/04/2024\", \"quantity\": 1, \"date_of_service\": \"09-04-2024\", \"amount\": 60.0}], \"beneficiary_name\": \"Ducloux Pauline\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366266osteo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"170000533\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurence Billerot\", \"total_billed\": 60.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VIDAL Olivier\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Laurence Billerot\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366268IMG_20240417_143900_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10002314812\", \"finess_number\": null, \"doctor_name\": \"GEVART Fr\\u00e9d\\u00e9ric\", \"total_billed\": 57.23, \"bill_paid\": true, \"amount_paid\": 57.23, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ILLAND Christophe\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"POINT VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BJQP002 BLQP010\", \"quantity\": null, \"date_of_service\": \"02-09-2022\", \"mandatory_coverage\": null, \"amount\": 57.23}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724397366294irm_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Radiologie\", \"adeli_number\": \"171701535\", \"rpps_number\": \"10002098167\", \"finess_number\": null, \"doctor_name\": \"LEGEAIS MARC\", \"total_billed\": 252.72, \"bill_paid\": true, \"amount_paid\": 80.75, \"mandatory_coverage\": 171.97, \"complementary_coverage\": null, \"client_part\": 80.75, \"remaining_payment\": 0, \"insured_name\": \"VIDAL OLIVIER\", \"insured_dob\": null, \"beneficiary_name\": \"VIDAL OLIVIER\", \"beneficiary_dob\": null, \"invoice_date\": \"08-04-2024\", \"security_number\": \"168109306207196\", \"invoice_issuer\": \"Docteur LEGEAIS MARC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"IRM GENOU\", \"quantity\": null, \"date_of_service\": \"08-04-2024\", \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050337CamScanner_24-10-2023_16.211_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"382607836\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Yann THOMAS\", \"total_billed\": 596.0, \"bill_paid\": true, \"amount_paid\": 596.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"COUDERC CEDRIC\", \"insured_dob\": \"09-12-1976\", \"beneficiary_name\": \"COUDERC CEDRIC\", \"beneficiary_dob\": \"09-12-1976\", \"invoice_date\": \"11-10-2023\", \"security_number\": \"176123403204524\", \"invoice_issuer\": \"YT OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SAFILO - Adresse du fabriquant non renseign\\u00e9e SAFILO MARC JACOBS MARC 417 Coloris BLACK Taille:56 Nez:12 Caract\\u00e9ristiques : Coloris BLACK Taille:56 Nez:12 Code:MARC417\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 196.0}, {\"description\": \"SHAMIR - 22, all. du Clos des Charmes 77090 Coll\\u00e9gien Auto Plus 18 1.5 Gla+ UV - Autograph Plus 18 1.5 Glacie+ UV, Verre progressif \\u00e0 surface digitale avanc\\u00e9e, avec traitement antireflet haute r\\u00e9sistance, facile \\u00e0 nettoyer et anti UV Caract\\u00e9ristiques : Indice:1.50 Diam\\u00e8tre:70/75 Classe:B Mati\\u00e8re : Organique Ecart VL=32-Ecart VP=0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 200.0}, {\"description\": \"SHAMIR - 22, all. du Clos des Charmes 77090 Coll\\u00e9gien Auto Plus 18 1.5 Gla+ UV - Autograph Plus 18 1.5 Glacie+ UV, Verre progressif \\u00e0 surface digitale avanc\\u00e9e, avec traitement antireflet haute r\\u00e9sistance, facile \\u00e0 nettoyer et anti UV Caract\\u00e9ristiques : Indice:1.50 Diam\\u00e8tre:70/75 Classe:B Mati\\u00e8re : Organique Ecart VL=32.5-Ecart VP=0\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 200.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050339FACTURE_132120424_MME_RONDIN_MAHEVA_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"9D9302731\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Frances COATES RUET El\", \"total_billed\": 80.0, \"bill_paid\": true, \"amount_paid\": 80.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Maheva RONDIN\", \"beneficiary_dob\": \"15-12-1984\", \"invoice_date\": \"12-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Mme Frances COATES RUET El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Psychoth\\u00e9rapie individuelle adulte de 90\\\" du 12/04/2024 Th\\u00e9rapie EMDR\", \"quantity\": null, \"date_of_service\": \"12-04-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050370Copie_Integrale_dacte_de_Naissance_Ya_Fatou_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fatou, Ely DIALLO\", \"beneficiary_dob\": \"14-04-2024\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050372FA_24-1136_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VERGNET Val\\u00e9rie\", \"total_billed\": 25, \"bill_paid\": true, \"amount_paid\": 25, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Varilles Christian\", \"beneficiary_dob\": null, \"invoice_date\": \"16-04-2024\", \"security_number\": null, \"invoice_issuer\": \"VERGNET Val\\u00e9rie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"R\\u00e9flexologie soins de support\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 25}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050374facture_podologue_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Podologue\", \"adeli_number\": \"838001022\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emmanuelle LANNOO\", \"total_billed\": 36, \"bill_paid\": true, \"amount_paid\": 36, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Minana Martin\", \"beneficiary_dob\": null, \"invoice_date\": \"11-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Emmanuelle LANNOO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins de podologie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 36}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050377psy_rachel_4_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"899302590\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marianne MICHAULT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rachel GABILLON\", \"beneficiary_dob\": \"27-04-1979\", \"invoice_date\": \"09-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Marianne MICHAULT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une consultation de psychologie\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050391Numerisation_20240417_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"630001311\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Willy PLONG\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"HERVIER Jacques\", \"insured_dob\": null, \"beneficiary_name\": \"HERVIER Jacques\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": \"153050331001367\", \"invoice_issuer\": \"Willy PLONG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050398Osteopathe_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"600001101\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marl\\u00e8ne FACQUET\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"beneficiary_name\": \"Esteve Jerome\", \"beneficiary_dob\": \"29-05-1989\", \"invoice_date\": \"10-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Marl\\u00e8ne FACQUET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"10-04-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"insured_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050400acte_de_naissance_Amaury_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Amaury, Marcel, Th\\u00e9odore CROYAL\", \"beneficiary_dob\": \"11-04-2024\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050403Adobe_Scan_15_avr._2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"060001393\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PARIAT Mickael\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Frongos PATRICK\", \"beneficiary_dob\": null, \"invoice_date\": \"10-04-2024\", \"security_number\": null, \"invoice_issuer\": \"PARIAT Mickael\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"10-04-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724399050413Facture_Osteo_Nathan_15_04_24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940009566\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"L\\u00e9a COTTARD\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUPUY Nathan\", \"beneficiary_dob\": \"30-06-2007\", \"invoice_date\": \"15-04-2024\", \"security_number\": null, \"invoice_issuer\": \"L\\u00e9a COTTARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031238IMG_20240417_161859.jpg_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"870000197\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florent MILLE\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"MOYON Christophe\", \"insured_dob\": null, \"beneficiary_name\": \"MOYON Christophe\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2024\", \"security_number\": \"168029408000153\", \"invoice_issuer\": \"Florent MILLE\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031280Facture_Osteopathe_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"130024854\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Willi Martin\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Serge MARTIN\", \"insured_dob\": \"18-04-1958\", \"beneficiary_name\": \"Serge MARTIN\", \"beneficiary_dob\": \"18-04-1958\", \"invoice_date\": null, \"security_number\": \"158049935078097\", \"invoice_issuer\": \"Willi Martin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031283ReleveMensuelDecembre2023_1_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031283ReleveMensuelDecembre2023_1_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"insured_name\": \"MORICE ERIC\", \"security_number\": \"158094415201327\", \"beneficiary_name\": \"MORICE ERIC\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [], \"insured_dob\": null, \"beneficiary_dob\": null, \"doctor_name\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031293Facture_HATCHERIANXavier_20240417IIKM_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 15.0, \"bill_paid\": true, \"amount_paid\": 15.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HATCHERIAN Xavier\", \"beneficiary_dob\": \"10-04-1984\", \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"INSTITUT TOULOUSAIN D'OSTEOPATHIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION OSTEOPATHIE\", \"quantity\": 1, \"date_of_service\": \"03-04-2024\", \"mandatory_coverage\": null, \"amount\": 15.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031296osteopathe_Aline__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"620005181\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Swann LE LAY\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lucciante Aline\", \"beneficiary_dob\": null, \"invoice_date\": \"12-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Swann LE LAY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"12-04-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/17244250313092024-03-00084_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chiropractie\", \"adeli_number\": \"110003282\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"David BERNARD\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Samantha MURILLO\", \"beneficiary_dob\": \"25-04-1978\", \"invoice_date\": \"29-03-2024\", \"security_number\": null, \"invoice_issuer\": \"David BERNARD (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de chiropraxie\", \"quantity\": 1, \"date_of_service\": \"29-03-2024\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031313MEDECINE_DOUCE_TOMASSELLI_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940010424\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ameline CHANAL\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme TOMASELLI Chiara\", \"beneficiary_dob\": null, \"invoice_date\": \"18-01-2024\", \"security_number\": null, \"invoice_issuer\": \"Ameline CHANAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-01-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031349MEDECINE_DOUCE_2_TOMASSELLI_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940010424\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ameline CHANAL\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mme TOHASSELLI Chiara\", \"beneficiary_dob\": null, \"invoice_date\": \"21-09-2023\", \"security_number\": null, \"invoice_issuer\": \"Ameline CHANAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"21-09-2023\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031383Osteopathe_D.O_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"940011794\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Manon VILPELLET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DAL Caroline\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Manon VILPELLET (E.I)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031396MEDECINE_DOUCE_3_TOMASSELLI_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950004457\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Meryl CORINTHE\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TOMASSELI\", \"beneficiary_dob\": null, \"invoice_date\": \"18-01-2023\", \"security_number\": null, \"invoice_issuer\": \"Meryl CORINTHE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation en ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"18-01-2023\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031408Facture_psy_fevrier_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"769314584\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Carpentier Morgane\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Maxime MALMANCHE\", \"beneficiary_dob\": null, \"invoice_date\": \"21-02-2024\", \"security_number\": null, \"invoice_issuer\": \"Carpentier Morgane - Psychologue\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi Psychologique - Entretien th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031411Facture_psy_avril_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"769314584\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Morgane Carpentier\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Maxime MALMANCHE\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Carpentier Morgane - Psychologue\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi Psychologique - Entretien th\\u00e9rapeutique\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031414recu-fac-2024-04-002327_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"300007903\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emma SAINT-JALMES\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"POUGARY L\\u00e9a\", \"beneficiary_dob\": \"04-10-1996\", \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'ost\\u00e9opathie de VALMEDICA (N\\u00eemes)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Premi\\u00e8re consultation - Adulte\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/172442503141724-04-08_recu_kine_Suz_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Kin\\u00e9sith\\u00e9rapie\", \"adeli_number\": \"307010538\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PIECHEGUT LAURENT\", \"total_billed\": 66.12, \"bill_paid\": true, \"amount_paid\": 26.44, \"mandatory_coverage\": 39.68, \"complementary_coverage\": null, \"client_part\": 26.44, \"remaining_payment\": 0.0, \"insured_name\": \"PRAD JEAN-CLAUDE\", \"insured_dob\": null, \"beneficiary_name\": \"PRAD SUZANNE\", \"beneficiary_dob\": \"13-07-1948\", \"invoice_date\": \"08-04-2024\", \"security_number\": \"1460634032003\", \"invoice_issuer\": \"PIECHEGUT LAURENT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"VSM 7.48\", \"quantity\": 4, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031419Facture_Osteo_20240417_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"910000553\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Etienne FERNANDEZ\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Berard Terrini\", \"insured_dob\": null, \"beneficiary_name\": \"Berard Terrini\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": \"136127511418721\", \"invoice_issuer\": \"Etienne FERNANDEZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"acte ost\\u00e9opathique\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425031429IMG_20240417_0001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"490005030\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Margot MILLESCAMPS\", \"total_billed\": 50, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie-Claude VALESINI\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"El Marine Levoyer\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Ost\\u00e9opathique\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079170Osteo_12-04-24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"310010392\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"J\\u00e9r\\u00f4me PONS\", \"total_billed\": 75, \"bill_paid\": true, \"amount_paid\": 75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHIARADIA Patrick\", \"beneficiary_dob\": null, \"invoice_date\": \"12-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Mr J\\u00e9r\\u00f4me PONS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"12-04-2024\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/172442507917415_03_24-charton-isabelle_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"700000060\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Koloszar\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Isabelle CHARTON\", \"insured_dob\": \"09-07-1975\", \"beneficiary_name\": \"Isabelle CHARTON\", \"beneficiary_dob\": \"09-07-1975\", \"invoice_date\": \"15-03-2024\", \"security_number\": \"275077031102035\", \"invoice_issuer\": \"Koloszar\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"15-03-2024\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079181FACTURE_PSY_PAUL_AVRIL_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"779302462\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie-C\\u00e9line Ballet\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Paul MARCHAL\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Marie-C\\u00e9line Ballet\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"13-04-2024\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079184Facture_Osteopathe_Brousseau_sader_17avr24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"440012771\", \"rpps_number\": \"10101759800\", \"doctor_name\": \"BROUSSEAU PHILIPPE\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"insured_name\": \"Sader Maryonne\", \"insured_dob\": \"25-10-1949\", \"invoice_date\": \"17-04-2024\", \"security_number\": \"249107858221096\", \"invoice_issuer\": \"BROUSSEAU PHILIPPE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance d'ost\\u00e9opathie\", \"date_of_service\": \"17-04-2024\", \"amount\": 65}], \"beneficiary_name\": \"Sader Maryonne\", \"beneficiary_dob\": \"25-10-1949\", \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079186Scan0013_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"660004888\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"E.I Oc\\u00e9ane FOURCADE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SANCHO S\\u00e9basti\", \"beneficiary_dob\": null, \"invoice_date\": \"13-04-2024\", \"security_number\": null, \"invoice_issuer\": \"E.I Oc\\u00e9ane FOURCADE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"13-04-2024\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079207douilly_6_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Radiologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dr Jean-Michel SVERZUT\", \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Louis LEDRU\", \"beneficiary_dob\": \"08-05-1957\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Centre Cardiologique du Nord\", \"currency\": null, \"items\": [{\"description\": \"\\u00c9CHOGRAPHIE DE LA CHEVILLE GAUCHE\", \"quantity\": null, \"date_of_service\": \"11-04-2024\", \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079210douilly_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"M\\u00e9decine G\\u00e9n\\u00e9rale\", \"adeli_number\": \"921045613\", \"rpps_number\": \"10100941052\", \"doctor_name\": \"Docteur BERREBY Ilan\", \"insured_name\": null, \"invoice_date\": \"09-04-2024\", \"invoice_issuer\": \"Docteur BERREBY Ilan\", \"items\": [], \"beneficiary_name\": \"M. Louis LEDRU\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079214image16-04-2024-173146-1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"950003806\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Morgane LAUDIC\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Elena Petit\", \"beneficiary_dob\": null, \"invoice_date\": \"08-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Morgane LAUDIC\", \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079218Facture_osteo_DOUCHET_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"070000724\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Virginie CAPARROS GUILLET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Douchet Laurent\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"Virginie CAPARROS GUILLET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079236MM_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologue\", \"adeli_number\": \"449319763\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lucas BINI\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marina MATIAS\", \"beneficiary_dob\": \"31-08-1999\", \"invoice_date\": \"17-04-2024\", \"security_number\": null, \"invoice_issuer\": \"EI Lucas BINI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"15-04-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079259osteo_22_mars_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"830002192\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Olivier AUBERT\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DETALLE Sandrine\", \"beneficiary_dob\": null, \"invoice_date\": \"22-03-2024\", \"security_number\": null, \"invoice_issuer\": \"Olivier AUBERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Une s\\u00e9ance d'ost\\u00e9opathie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079263JACOB_Francois_2024-111_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1724425079263JACOB_Francois_2024-111_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"invoice_number\": \"2024-111\", \"profession\": \"Ost\\u00e9opathe\", \"adeli_number\": \"670007087\", \"doctor_name\": \"CHARLES Fanny\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"insured_name\": null, \"invoice_date\": \"16-04-2024\", \"invoice_issuer\": \"Mme CHARLES Fanny\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ost\\u00e9opathique\", \"date_of_service\": \"16-04-2024\", \"amount\": 55}], \"beneficiary_name\": \"JACOB Fran\\u00e7ois\", \"insured_dob\": null, \"beneficiary_dob\": null, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/Sans titre_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Medecine G\\u00e9n\\u00e9rale\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DOCTEUR GHEZ STEPHANE\", \"total_billed\": 26.5, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"GASMI Christophe\", \"insured_dob\": \"18-08-1993\", \"beneficiary_name\": \"GASMI Christophe\", \"beneficiary_dob\": \"18-08-1993\", \"invoice_date\": \"17-04-2024\", \"security_number\": \"193080608849680\", \"invoice_issuer\": \"DOCTEUR GHEZ STEPHANE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"G8\", \"quantity\": null, \"date_of_service\": \"17-04-2024\", \"mandatory_coverage\": null, \"amount\": 26.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/doc_print_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychomotricit\\u00e9\", \"adeli_number\": \"629602582\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. Ludovic Sant\", \"total_billed\": 250.0, \"bill_paid\": true, \"amount_paid\": 250.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Ma\\u00ebl LAMBERT\", \"beneficiary_dob\": \"30-10-2017\", \"invoice_date\": \"08-04-2024\", \"security_number\": null, \"invoice_issuer\": \"M. Ludovic Sant (E.I), Psychomotricien D.E.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Bilan psychomoteur\", \"quantity\": 1, \"date_of_service\": \"26-02-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"Passation des \\u00e9preuves de bilan\", \"quantity\": 1, \"date_of_service\": \"11-03-2024\", \"mandatory_coverage\": null, \"amount\": 45.0}, {\"description\": \"Passation des \\u00e9preuves de bilan\", \"quantity\": 1, \"date_of_service\": \"15-03-2024\", \"mandatory_coverage\": null, \"amount\": 45.0}, {\"description\": \"Synth\\u00e8se et entretien de bilan\", \"quantity\": 1, \"date_of_service\": \"08-04-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c6013936-5850-4027-9f61-32c30486e6d5_optique_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10004069281\", \"rpps_number\": \"10004069281\", \"finess_number\": null, \"doctor_name\": \"BOITTE JEAN-PIERRE\", \"total_billed\": 92.77, \"bill_paid\": true, \"amount_paid\": 42.18, \"mandatory_coverage\": 50.59, \"complementary_coverage\": 0.0, \"client_part\": 42.18, \"remaining_payment\": null, \"insured_name\": \"SOMOR CHRISTELLE\", \"insured_dob\": \"07-12-1972\", \"beneficiary_name\": \"SOMOR CHRISTELLE\", \"beneficiary_dob\": \"07-12-1972\", \"invoice_date\": \"23-05-2025\", \"security_number\": \"27212930902347\", \"invoice_issuer\": \"SELARL LECALINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": 41.59, \"amount\": 76.26}, {\"description\": \"ADI\", \"quantity\": null, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": 9, \"amount\": 16.51}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/92340ad4-27cf-476b-a842-051d0b6a2c9f_IMG_20250619_231842_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"621852599\", \"rpps_number\": \"10100962827\", \"finess_number\": null, \"doctor_name\": \"VANGHELUWE VIRGINIE\", \"total_billed\": 59.47, \"bill_paid\": true, \"amount_paid\": 59.47, \"mandatory_coverage\": 27.63, \"complementary_coverage\": 11.84, \"client_part\": 20.0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BACLET Louis\", \"beneficiary_dob\": \"05-11-2014\", \"invoice_date\": \"14-06-2025\", \"security_number\": \"2860462126112\", \"invoice_issuer\": \"CENTRE MEDICAL OPHTALMOLOGlQUE POINT VISION CALAIS-MARCK\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 27.63, \"amount\": 59.47}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d4f310c8-3bcf-4bb5-bf53-27ad74d66c1e_FACTURE_OPTICIEN_OPHTALMO_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d4f310c8-3bcf-4bb5-bf53-27ad74d66c1e_FACTURE_OPTICIEN_OPHTALMO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131025397\", \"rpps_number\": \"10100555415\", \"finess_number\": null, \"doctor_name\": \"Dr Abdurraouf MAHFUD ALI\", \"total_billed\": 72.18, \"bill_paid\": true, \"amount_paid\": 72.18, \"mandatory_coverage\": 20.43, \"complementary_coverage\": 8.75, \"client_part\": 43.0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"KABARADJIAN Elodie\", \"beneficiary_dob\": \"14-08-1983\", \"invoice_date\": \"04-06-2025\", \"security_number\": \"2830813055470\", \"invoice_issuer\": \"CABINET MEDICAL OPHTALMOLOGIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 20.43, \"amount\": 72.18}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b0c00bc9-adf8-4972-8350-a2a51177265a_bill_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10003944641\", \"finess_number\": \"421805318\", \"doctor_name\": \"COHEN Gilbert\", \"total_billed\": 44.0, \"bill_paid\": true, \"amount_paid\": 44.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. CADET Jonathan\", \"beneficiary_dob\": null, \"invoice_date\": \"11-06-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de l'Horme\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY8\", \"quantity\": 1, \"date_of_service\": \"11-06-2025\", \"mandatory_coverage\": null, \"amount\": 20.8}, {\"description\": \"Hors Nomenclature\", \"quantity\": 1, \"date_of_service\": \"11-06-2025\", \"mandatory_coverage\": null, \"amount\": 23.2}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b5ecf45a-00cd-45a8-900d-6a4fc90a79a4_ZOPIE_CORPETTI_CAROLE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10002331949\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ZOPIE CORRETTI CAROLE\", \"total_billed\": 113.44, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": 63.34, \"complementary_coverage\": 0.0, \"client_part\": 50.0, \"remaining_payment\": null, \"insured_name\": \"GLENAC JOSE\", \"insured_dob\": \"19-02-1973\", \"beneficiary_name\": \"GLENAC JOSE\", \"beneficiary_dob\": \"19-02-1973\", \"invoice_date\": \"14-03-2025\", \"security_number\": \"173029741140365\", \"invoice_issuer\": \"ZOPIE CORPETTI CAROLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": 1, \"date_of_service\": \"14-03-2025\", \"mandatory_coverage\": 35, \"amount\": 62.63}, {\"description\": \"ATM\", \"quantity\": 1, \"date_of_service\": \"14-03-2025\", \"mandatory_coverage\": 10.34, \"amount\": 18.5}, {\"description\": \"ATM\", \"quantity\": 1, \"date_of_service\": \"14-03-2025\", \"mandatory_coverage\": 18, \"amount\": 32.21}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4bcfbcdf-444a-49a5-8240-ac77a7b378aa_IMG_20250612_194355_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"971745567\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ALBRIEUX MAGALI\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 38.65, \"mandatory_coverage\": 21.35, \"complementary_coverage\": null, \"client_part\": 38.65, \"remaining_payment\": null, \"insured_name\": \"DIBLAR REMY\", \"insured_dob\": \"07-07-1986\", \"beneficiary_name\": \"DIBLAR REMY\", \"beneficiary_dob\": \"07-07-1986\", \"invoice_date\": \"12-06-2025\", \"security_number\": \"1860797416099\", \"invoice_issuer\": \"Madame ALBRIEUX MAGALI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 ATM 1\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": 21.35, \"amount\": 60.0}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d9e1cb2a-b44b-4cc8-b07a-030ddf3acd28_Ordonnance_lentilles_Mary_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4d989506-6dba-451c-97ab-6c371618298f_8089465_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 31.8, \"bill_paid\": true, \"amount_paid\": 31.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Charlene BRUSSET\", \"beneficiary_dob\": null, \"invoice_date\": \"05-06-2025\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PureVision 2 (6) (Sph\\u00e9rique)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 25.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.9}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1eea4521-e98f-4f41-911b-07ffaf3b52a0_Facture_MGP_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10100538304\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MASCALI REMI\", \"total_billed\": 72.63, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": 27.63, \"complementary_coverage\": 0.0, \"client_part\": 45.0, \"remaining_payment\": 0.0, \"insured_name\": \"CANVOT ROMY\", \"insured_dob\": \"10-02-2015\", \"beneficiary_name\": \"JOURDAIN CANVOT KIMBERLY\", \"beneficiary_dob\": null, \"invoice_date\": \"03-06-2025\", \"security_number\": \"282037512009507\", \"invoice_issuer\": \"SELARL DES DRS DARUGAR ET MASCALI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"03-06-2025\", \"mandatory_coverage\": 18.64, \"amount\": 49.0}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"03-06-2025\", \"mandatory_coverage\": 8.99, \"amount\": 23.63}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131758898\", \"rpps_number\": \"10101230869\", \"finess_number\": null, \"doctor_name\": \"Dr HUGO Juliette\", \"total_billed\": 138.59, \"bill_paid\": true, \"amount_paid\": 78.81, \"mandatory_coverage\": 59.78, \"complementary_coverage\": 0.0, \"client_part\": 78.81, \"remaining_payment\": null, \"insured_name\": \"DAGOREAU MARIE-CATHERINE\", \"insured_dob\": \"02-02-1970\", \"beneficiary_name\": \"DAGOREAU MARIE-CATHERINE\", \"beneficiary_dob\": \"02-02-1970\", \"invoice_date\": \"03-06-2025\", \"security_number\": \"270023155503430\", \"invoice_issuer\": \"SELARL DU DOCTEUR JULIETTE HUGO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ADE\", \"quantity\": null, \"date_of_service\": \"03-06-2025\", \"mandatory_coverage\": 40.38, \"amount\": 93.62}, {\"description\": \"ADE\", \"quantity\": null, \"date_of_service\": \"03-06-2025\", \"mandatory_coverage\": 19.4, \"amount\": 44.97}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/68f79cfb-bc94-42fa-b7d0-2406f95f2327_Devis_lunettes_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/68f79cfb-bc94-42fa-b7d0-2406f95f2327_Devis_lunettes_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9c2763f2-fb0c-4fe4-be38-6f94fba64162_optical_facture_lunettes_apres_accident_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9c2763f2-fb0c-4fe4-be38-6f94fba64162_optical_facture_lunettes_apres_accident_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"90245843\", \"rpps_number\": \"10002309630\", \"finess_number\": null, \"doctor_name\": \"LIBESKIND Frederique\", \"total_billed\": 551.58, \"bill_paid\": true, \"amount_paid\": 276.49, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 275.0, \"client_part\": 276.49, \"remaining_payment\": 0.0, \"insured_name\": \"AKNIN ROSETTE\", \"insured_dob\": \"12-09-1960\", \"beneficiary_name\": \"AKNIN ROSETTE\", \"beneficiary_dob\": \"12-09-1960\", \"invoice_date\": \"23-04-2025\", \"security_number\": \"260999280405530\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PAC - Level PP - LE2037 - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 79.0}, {\"description\": \"BBGR - Nikon P.Clear 15 1.67 Blu XtrNG SCCnextUV - 70/75\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 236.29}, {\"description\": \"BBGR - Nikon P.Clear 15 1.67 Blu XtrNG SCCnextUV - 70/75\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 236.29}, {\"description\": \"Lunettes BONDET - NOCLE - SUREAU Tradutionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 69.0}, {\"description\": \"BBGR - PACK CONF L INFINI 1.5 BLANC - 70/75\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 0.0}, {\"description\": \"BBGR - PACK CONF L INFINI 1.5 BLANC - 70/75\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 0.0}], \"is_handwriting\": false, \"is_rotated\": false, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e4c7735f-0479-4432-820e-fee0cb1f1c82_Facture_0276202505750501032_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Antoine HANNAPPE\", \"total_billed\": 40.2, \"bill_paid\": true, \"amount_paid\": 40.2, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"WAMBRE Melanie\", \"beneficiary_dob\": null, \"invoice_date\": \"29-05-2025\", \"security_number\": \"2931162126286\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique BRUAY LA BUISSIERE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille - AIR OPTIX plus hydraglyde 3L + Incroto Dur\\u00e9e de vie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 15.4}, {\"description\": \"Lentille - AIR OPTIX plus hydraglyde for astigmatism 3L + Incroto Dur\\u00e9e de vie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 24.8}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5550f57b-87f9-49dc-966d-aebef8d6c1d0_facture-chaulan-eliane-20250523-1531_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": \"1010409674\", \"finess_number\": null, \"doctor_name\": \"Maxime UNIA\", \"total_billed\": 291.91, \"bill_paid\": true, \"amount_paid\": 146.7, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"CHAUPLAN Eliane\", \"insured_dob\": \"29-07-1943\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"23-05-2025\", \"security_number\": \"243078309040203\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"Obligation occlusale\", \"quantity\": 1, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": null, \"amount\": 30.47}, {\"description\": \"Obligation occlusale\", \"quantity\": 1, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": null, \"amount\": 30.47}, {\"description\": \"Obligation occlusale\", \"quantity\": 1, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": null, \"amount\": 30.47}, {\"description\": \"D\\u00e9tartrage/polissage des dents\", \"quantity\": 1, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": null, \"amount\": 28.92}, {\"description\": \"D\\u00e9tartrage/polissage des dents\", \"quantity\": 1, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": null, \"amount\": 14.46}, {\"description\": \"Avulsion dent permanente\", \"quantity\": 1, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": null, \"amount\": 39.0}, {\"description\": \"D\\u00e9contamination et comblement de l'alv\\u00e9ole LASER assist\\u00e9\", \"quantity\": 1, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fa25423e-b79c-48f9-89ae-32ffd95e3101_Facture_Ophtalmologie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"STANESCU DINU\", \"total_billed\": 129.32, \"bill_paid\": true, \"amount_paid\": 129.32, \"mandatory_coverage\": 47.82, \"complementary_coverage\": null, \"client_part\": 81.5, \"remaining_payment\": null, \"insured_name\": \"ALI BEY ZAIHA\", \"insured_dob\": \"28-01-1966\", \"beneficiary_name\": \"ALI BEY ZAIHA\", \"beneficiary_dob\": \"28-01-1966\", \"invoice_date\": null, \"security_number\": \"2660159512296\", \"invoice_issuer\": \"STANESCU DINU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 47.82, \"amount\": 129.32}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Soins hospitalier\", \"adeli_number\": \"10002331949\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ZOPIE CAROLE\", \"total_billed\": 92.4, \"bill_paid\": true, \"amount_paid\": 27.72, \"mandatory_coverage\": 64.68, \"complementary_coverage\": 0.0, \"client_part\": 27.72, \"remaining_payment\": null, \"insured_name\": \"LEROUX BRUNELLA\", \"insured_dob\": \"19-05-1981\", \"beneficiary_name\": \"LEROUX BRUNELLA\", \"beneficiary_dob\": \"19-05-1981\", \"invoice_date\": \"23-05-2025\", \"security_number\": \"281059741421164\", \"invoice_issuer\": \"ZOPIE CORPETTI CAROLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": 35, \"amount\": 50.0}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": 20.68, \"amount\": 12.86}, {\"description\": \"ADI\", \"quantity\": null, \"date_of_service\": \"23-05-2025\", \"mandatory_coverage\": 9, \"amount\": 12.86}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cdea6c95-be16-4d2b-ba78-95ef182d8e63_IMG_20250523_084341_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ALAME MAHMOUD\", \"total_billed\": 171.3, \"bill_paid\": true, \"amount_paid\": 25.0, \"mandatory_coverage\": 146.3, \"complementary_coverage\": 0.0, \"client_part\": 25.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DIE DENIS\", \"beneficiary_dob\": \"26-11-1954\", \"invoice_date\": \"07-04-2025\", \"security_number\": \"154110832809635\", \"invoice_issuer\": \"ALAME\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BGNP001 1 0\", \"quantity\": null, \"date_of_service\": \"07-04-2025\", \"mandatory_coverage\": 146.3, \"amount\": 171.3}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4d9dad1f-732c-4580-8899-438c1eae07a3_FACTURE_LUNETTES_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GRIGIER Pierre\", \"total_billed\": 528.0, \"bill_paid\": true, \"amount_paid\": 178.85, \"mandatory_coverage\": 0.15, \"complementary_coverage\": 349.0, \"client_part\": 178.85, \"remaining_payment\": null, \"insured_name\": \"Yves BAPTISTE\", \"insured_dob\": \"29-04-1959\", \"beneficiary_name\": \"Yves BAPTISTE\", \"beneficiary_dob\": \"29-04-1959\", \"invoice_date\": \"15-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Krys\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Marchon\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.05, \"amount\": 138.0}, {\"description\": \"Codir\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.05, \"amount\": 195.0}, {\"description\": \"QUATTRO UV Antireflet facile \\u00e0 entretenir\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Codir\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.05, \"amount\": 195.0}, {\"description\": \"QUATTRO UV Antireflet facile \\u00e0 entretenir\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"is_handwriting\": false, \"is_rotated\": false, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3c00ec8d-24ad-46c1-aa12-08f7cba34328_17478452286276479334092225478882_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10003439063\", \"rpps_number\": \"10003439063\", \"finess_number\": null, \"doctor_name\": \"FOGLIARINI Richard-Vitton\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 32.48, \"mandatory_coverage\": 27.52, \"complementary_coverage\": 0.0, \"client_part\": 32.48, \"remaining_payment\": null, \"insured_name\": \"LORANG LANA\", \"insured_dob\": \"22-01-2007\", \"beneficiary_name\": \"LORANG LANA\", \"beneficiary_dob\": \"22-01-2007\", \"invoice_date\": \"20-05-2025\", \"security_number\": \"207011300129264\", \"invoice_issuer\": \"CABINET DU DR CELINE FOGLIARINI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"20-05-2025\", \"mandatory_coverage\": 20.08, \"amount\": 43.85}, {\"description\": \"ADI\", \"quantity\": null, \"date_of_service\": \"20-05-2025\", \"mandatory_coverage\": 7.44, \"amount\": 16.15}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3d924a73-2617-4758-a677-2374ab3fb3df_CLIENT_DOCUMENT_DE_TRAVAIL1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [{\"description\": null, \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b7df45bd-a142-4cb1-bb00-69122c5b405a_IMG20250520184626_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"661021444\", \"rpps_number\": \"10003240008\", \"finess_number\": null, \"doctor_name\": \"MALET Didier\", \"total_billed\": 594.8, \"bill_paid\": true, \"amount_paid\": 330.65, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 264.06, \"client_part\": 330.65, \"remaining_payment\": null, \"insured_name\": \"MALECAMP CELIA\", \"insured_dob\": \"31-07-2015\", \"beneficiary_name\": \"MALECAMP CELIA\", \"beneficiary_dob\": \"31-07-2015\", \"invoice_date\": \"20-05-2025\", \"security_number\": \"286036604901236\", \"invoice_issuer\": \"OPTIC 2000 CERET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AudiOptic Trade Services - COSMOPOLITAN - CM2409 - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 139.0}, {\"description\": \"HOYA LENS FRANCE - MIYOSMART 1.59 UV2.0 SMTC - 70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 244.0}, {\"description\": \"HOYA LENS FRANCE - MIYOSMART 1.59 UV2.0 SMTC - 70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 244.0}, {\"description\": \"AudiOptic Trade Services - JAW - A2303 - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 129.0}, {\"description\": \"ESSILOR - Unifocal Generique 1.5 Solaire - 70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 21.0}, {\"description\": \"ESSILOR - Unifocal Generique 1.5 Solaire - 70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 21.0}], \"is_handwriting\": false, \"is_rotated\": false, \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9cf59e81-4e88-4355-b56b-796a8d472c49_Scan_2025-05-20_06.36.44_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9cf59e81-4e88-4355-b56b-796a8d472c49_Scan_2025-05-20_06.36.44_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"970412664\", \"doctor_name\": null, \"total_billed\": 21.76, \"bill_paid\": true, \"amount_paid\": 8.7, \"mandatory_coverage\": 13.06, \"complementary_coverage\": 0.0, \"client_part\": 8.7, \"remaining_payment\": null, \"insured_name\": \"GIGAN JEANICK\", \"insured_dob\": \"29-10-1980\", \"beneficiary_name\": \"GIGAN JEANICK\", \"beneficiary_dob\": \"29-10-1980\", \"invoice_date\": \"20-05-2025\", \"security_number\": \"180109741426592\", \"invoice_issuer\": \"CDS VISION CLAIRE - SAINT PIERRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY8\", \"quantity\": null, \"date_of_service\": \"20-05-2025\", \"mandatory_coverage\": 13.06, \"amount\": 21.76}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"970412664\", \"doctor_name\": \"AMARA Nadia\", \"total_billed\": 32.56, \"bill_paid\": true, \"amount_paid\": 9.77, \"mandatory_coverage\": 22.79, \"complementary_coverage\": 0.0, \"client_part\": 9.77, \"remaining_payment\": null, \"insured_name\": \"GIGAN JEANICK\", \"insured_dob\": \"29-10-1980\", \"beneficiary_name\": \"GIGAN JEANICK\", \"beneficiary_dob\": \"29-10-1980\", \"invoice_date\": \"20-05-2025\", \"security_number\": \"180109741426592\", \"invoice_issuer\": \"CDS VISION CLAIRE - SAINT PIERRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"20-05-2025\", \"mandatory_coverage\": 0.0, \"amount\": 22.67}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"20-05-2025\", \"mandatory_coverage\": 0.0, \"amount\": 9.89}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b96f6ca3-3175-4cd8-a827-e886b5799867_Facture_ophtalmologue_24022025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"061750733\", \"finess_number\": null, \"doctor_name\": \"BLOISE LOUISETTE\", \"total_billed\": 127.69, \"bill_paid\": true, \"amount_paid\": 59.0, \"mandatory_coverage\": 68.69, \"complementary_coverage\": null, \"client_part\": 59.0, \"remaining_payment\": 0, \"insured_name\": \"SAIF Souad\", \"insured_dob\": \"01-12-1973\", \"beneficiary_name\": \"SAIF Souad\", \"beneficiary_dob\": \"01-12-1973\", \"invoice_date\": \"24-02-2025\", \"security_number\": \"2731213001017\", \"invoice_issuer\": \"CENTRE MEDICAL OPHTALMOLOGIQUE POINT VISION NICE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 68.69, \"amount\": 127.69}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cc3b1adf-4796-45ce-abcd-32dfa977c102_FACTURE_OPTIQUE3-1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"591208038\", \"doctor_name\": \"NOUVEL LISA\", \"total_billed\": 741.0, \"bill_paid\": true, \"amount_paid\": 490.91, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 250.0, \"client_part\": 490.91, \"remaining_payment\": 0.0, \"insured_name\": \"HAZARD Angelique\", \"insured_dob\": \"16-06-1978\", \"beneficiary_name\": \"HAZARD Angelique\", \"beneficiary_dob\": \"16-06-1978\", \"invoice_date\": \"17-05-2025\", \"security_number\": \"278066930318175\", \"invoice_issuer\": \"Optique OLIVIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"GIGI - GIGI STUDIOS - CALA - 9010/020 - 52/17 Precal\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 189.0}, {\"description\": \"ESS - Essilor Inizio Orma BUC Cz Rock - 65/70 Precal\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 276.0}, {\"description\": \"ESS - Essilor Inizio Orma BUC Cz Rock - 65/70 Precal\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 276.0}, {\"description\": \"MF\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/80b79126-2cfa-41bd-83cd-81fa0efca7c3_facture_lentilles_Leila_ben_amor_REMADI__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 214.97, \"bill_paid\": true, \"amount_paid\": 214.97, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MEZIANE LEILA REMADI BEN AMOR\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"ALENSA s.r.o.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Dailies TOTAL1 Multifocal (90 lentilles)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 98.99}, {\"description\": \"Dailies TOTAL1 Multifocal (90 lentilles)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 98.99}, {\"description\": \"Solution Gelone 2 x 360 ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 16.99}, {\"description\": \"Cadeau: Dermacol S\\u00e9rum Raffermissant Caviar Energy 12 ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/895777f3-4478-4e8d-a616-d498850d1523_factures-lunettes_Tess_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/895777f3-4478-4e8d-a616-d498850d1523_factures-lunettes_Tess_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"172603680\", \"doctor_name\": \"GUIGUI Benjamin\", \"total_billed\": 583.0, \"bill_paid\": true, \"amount_paid\": 243.17, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 149.94, \"client_part\": 243.17, \"remaining_payment\": null, \"insured_name\": \"Tess CLAEYS\", \"insured_dob\": \"17-05-2019\", \"beneficiary_name\": \"Tess CLAEYS\", \"beneficiary_dob\": \"17-05-2019\", \"invoice_date\": \"30-04-2025\", \"security_number\": \"284036938803941\", \"invoice_issuer\": \"Krys Saintes - Cours National\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPal-236 Rue des Combattants En AFN, 1952-42 - FR-69009 Lyon. Monture et Chocelet TCA407 C70 Domain Tarte Rose Clair-647-7506801304945 Plastique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 145.0}, {\"description\": \"Hoya Lens France\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 219.0}, {\"description\": \"Hi-Vision LongLife\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Sensity 2 Gris\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CALISIZE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Hoya Lens France\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 219.0}, {\"description\": \"Hi-Vision LongLife\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Sensity 2 Gris\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CALISIZE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"172603680\", \"doctor_name\": \"GUIGUI Benjamin\", \"total_billed\": 41.0, \"bill_paid\": true, \"amount_paid\": 41.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 41.0, \"insured_name\": \"Tess CLAEYS\", \"insured_dob\": \"17-05-2019\", \"beneficiary_name\": \"Tess CLAEYS\", \"beneficiary_dob\": \"17-05-2019\", \"invoice_date\": \"30-04-2025\", \"security_number\": \"284036938803941\", \"invoice_issuer\": \"Krys Saintes - Cours National\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Codir\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.0}, {\"description\": \"Codir\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 25.0}, {\"description\": \"GRIS 85% Verre teint\\u00e9 gris 85%\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"UV PROTECT 380 Traitement anti UV\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CAULISZE Verre pr\\u00e9-calibr\\u00e9\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Codir\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 25.0}, {\"description\": \"GRIS 85% Verre teint\\u00e9 gris 85%\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"UV PROTECT 380 Traitement anti UV\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CAULISZE Verre pr\\u00e9-calibr\\u00e9\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0632d6ad-c720-45d5-bacc-7f2fcf196d60_facture_consultation_ophtalmo_delphine_avril_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131074791\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GILLES MARION\", \"total_billed\": 117.88, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": 62.89, \"complementary_coverage\": null, \"client_part\": 55.0, \"remaining_payment\": null, \"insured_name\": \"BACCINI Delphine\", \"insured_dob\": \"27-12-1972\", \"beneficiary_name\": \"BACCINI Delphine\", \"beneficiary_dob\": \"27-12-1972\", \"invoice_date\": \"10-04-2025\", \"security_number\": \"2721255545100\", \"invoice_issuer\": \"Centre Medical Ophtalmologique Point Vision Arles\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 62.89, \"amount\": 117.88}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e8e0549f-9d27-4508-9f56-1750649f9c1f_17466044538525179769049202853107_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"651802209\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SAKR Fr\\u00e9d\\u00e9ric\", \"total_billed\": 20.8, \"bill_paid\": true, \"amount_paid\": 16.0, \"mandatory_coverage\": 12.48, \"complementary_coverage\": null, \"client_part\": 8.32, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DELPEYRAT Dorian\", \"beneficiary_dob\": null, \"invoice_date\": \"07-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Docteur SAKR Fr\\u00e9d\\u00e9ric\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY 8\", \"quantity\": 1, \"date_of_service\": \"07-05-2025\", \"mandatory_coverage\": null, \"amount\": 20.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/78ec53bb-a4c5-469f-a4d0-f42db4cf9b11_CF_1697735_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/78ec53bb-a4c5-469f-a4d0-f42db4cf9b11_CF_1697735_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"doctor_name\": null, \"total_billed\": 64.48, \"bill_paid\": true, \"amount_paid\": 64.48, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"Bruno ZINANI\", \"insured_dob\": null, \"beneficiary_name\": \"Bruno ZINANI\", \"beneficiary_dob\": null, \"invoice_date\": \"02-04-2025\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 Day Acuvue Oasys For Astigmatism 30\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.94}, {\"description\": \"1 Day Acuvue Oasys For Astigmatism 30\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.94}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fa566bb1-6bb3-4b4e-9f47-0ee5052785b5_facture_MGP_Lily_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. WILLIAM BOUCENNA\", \"total_billed\": 39.0, \"bill_paid\": true, \"amount_paid\": 15.6, \"mandatory_coverage\": 23.4, \"complementary_coverage\": 0.0, \"client_part\": 15.6, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JUPIN LILY\", \"beneficiary_dob\": \"27-11-2010\", \"invoice_date\": \"31-03-2025\", \"security_number\": \"281108002123055\", \"invoice_issuer\": \"M. WILLIAM BOUCENNA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ANY 15\", \"quantity\": 1, \"date_of_service\": \"31-03-2025\", \"mandatory_coverage\": 23.4, \"amount\": 39.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/91c41171-ac88-4ce4-8f15-48c4ad2c5aa2_Devis_lunette_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/91c41171-ac88-4ce4-8f15-48c4ad2c5aa2_Devis_lunette_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [{\"description\": null, \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/63bad7eb-92c1-426f-b64c-9aa22cb20fed_FACTURE_OPTIQUE2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"950046052\", \"doctor_name\": null, \"total_billed\": 240.0, \"bill_paid\": true, \"amount_paid\": 149.94, \"mandatory_coverage\": 0.06, \"complementary_coverage\": 90.0, \"client_part\": 149.94, \"remaining_payment\": null, \"insured_name\": \"M. CHEA Christian\", \"insured_dob\": \"06-01-1984\", \"beneficiary_name\": \"M. CHEA Christian\", \"beneficiary_dob\": \"06-01-1984\", \"invoice_date\": \"12-03-2025\", \"security_number\": \"1840175120041\", \"invoice_issuer\": \"pointOptical\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ZEISS UPH SF 153 UV Protect BP - 6S\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 120.0}, {\"description\": \"ZEISS UPH SF 153 UV Protect BP - 6S\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 120.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/62b45277-a437-48c6-af52-d1814f23434f_17460042180346042803920585280783_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"M. WILLIAM BOUCENNA\", \"total_billed\": 77.34, \"bill_paid\": true, \"amount_paid\": 35.0, \"mandatory_coverage\": 29.64, \"complementary_coverage\": 12.7, \"client_part\": 35.0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MOREAU EMY\", \"beneficiary_dob\": \"03-15-2013\", \"invoice_date\": \"29-04-2025\", \"security_number\": \"283030238101893\", \"invoice_issuer\": \"M. WILLIAM BOUCENNA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BGQP002\", \"quantity\": null, \"date_of_service\": \"29-04-2025\", \"mandatory_coverage\": 20.08, \"amount\": 63.68}, {\"description\": \"BGQP009\", \"quantity\": null, \"date_of_service\": \"29-04-2025\", \"mandatory_coverage\": 9.56, \"amount\": 13.66}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/699e17b1-2aff-4884-af8c-126367c8ca95_lunettes_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10108800672\", \"finess_number\": \"930031018\", \"doctor_name\": \"ADDALA Yakine\", \"total_billed\": 262, \"bill_paid\": true, \"amount_paid\": 180.4, \"mandatory_coverage\": 81.6, \"complementary_coverage\": 180.4, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Francine Pouvait\", \"beneficiary_dob\": null, \"invoice_date\": \"26-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Lunettes Pour Tous\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LPT Supply Chain (6 Rue de brouque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.4, \"amount\": 30.0}, {\"description\": \"LPT Supply Chain (6 Rue de brouque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 17.1, \"amount\": 90.0}, {\"description\": \"LPT Supply Chain (6 Rue de brouque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 16.2, \"amount\": 90.0}, {\"description\": \"Suppl\\u00e9ment Prestation d'appairage\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.9, \"amount\": 5}, {\"description\": \"Suppl\\u00e9ment Prestation forfaitaire de la sescurit\\u00e9 sociale \", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 42, \"amount\": 42}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b01c4fa3-53cb-49a7-8e24-a8f7aabe65f2_2025-04-28_consultation_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": \"10100929214\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"NICOLO PIERRE\", \"total_billed\": 121.69, \"bill_paid\": true, \"amount_paid\": 65.19, \"mandatory_coverage\": 56.5, \"complementary_coverage\": 0.0, \"client_part\": 65.19, \"remaining_payment\": null, \"insured_name\": \"GALIA GERARD\", \"insured_dob\": \"13-09-1949\", \"beneficiary_name\": \"GALIA GERARD\", \"beneficiary_dob\": \"13-09-1949\", \"invoice_date\": \"28-04-2025\", \"security_number\": \"149099380317418\", \"invoice_issuer\": \"SELAS POM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"28-04-2025\", \"mandatory_coverage\": 33.98, \"amount\": 85.84}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"28-04-2025\", \"mandatory_coverage\": 10.04, \"amount\": 15.45}, {\"description\": \"AMY8\", \"quantity\": null, \"date_of_service\": \"28-04-2025\", \"mandatory_coverage\": 12.48, \"amount\": 20.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/dc224b52-7d9a-4cda-9ac9-f12f4a7c1762_20250428102258_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": null, \"rpps_number\": \"10100361988\", \"finess_number\": null, \"doctor_name\": \"NORMA FABRE-VILA RICART\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 75.98, \"mandatory_coverage\": 44.02, \"complementary_coverage\": 75.98, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"Chantal Diaz\", \"insured_dob\": \"18-11-1961\", \"beneficiary_name\": \"Chantal Diaz\", \"beneficiary_dob\": \"18-11-1961\", \"invoice_date\": \"22-04-2025\", \"security_number\": \"261118400711325\", \"invoice_issuer\": \"NORMA FABRE-VILA RICART\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 x BZQK001, 1 x BGQP002\", \"quantity\": 1, \"date_of_service\": \"22-04-2025\", \"mandatory_coverage\": 44.02, \"amount\": 120}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1abe7c4f-d458-4575-a0e9-3d2eaa3e0787_20250423133038356_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1abe7c4f-d458-4575-a0e9-3d2eaa3e0787_20250423133038356_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"311010334\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SAKR FREDERIC\", \"total_billed\": 75.01, \"bill_paid\": true, \"amount_paid\": 75.01, \"mandatory_coverage\": 27.79, \"complementary_coverage\": null, \"client_part\": 47.22, \"remaining_payment\": null, \"insured_name\": \"BOYER COLINE\", \"insured_dob\": \"09-06-1996\", \"beneficiary_name\": \"BOYER COLINE\", \"beneficiary_dob\": \"09-06-1996\", \"invoice_date\": \"10-04-2025\", \"security_number\": \"2960631555474\", \"invoice_issuer\": \"Monsieur SAKR FREDERIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 ATM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 27.79, \"amount\": 75.01}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"311010334\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SAKR FREDERIC\", \"total_billed\": 20.8, \"bill_paid\": true, \"amount_paid\": 20.8, \"mandatory_coverage\": 12.48, \"complementary_coverage\": null, \"client_part\": 8.32, \"remaining_payment\": null, \"insured_name\": \"BOYER COLINE\", \"insured_dob\": \"09-06-1996\", \"beneficiary_name\": \"BOYER COLINE\", \"beneficiary_dob\": \"09-06-1996\", \"invoice_date\": \"10-04-2025\", \"security_number\": \"2960631555474\", \"invoice_issuer\": \"Monsieur SAKR FREDERIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY 8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 27.79, \"amount\": 75.01}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/baadcf11-c68e-4774-bbaf-c7dad90b4e13_DURAND_FABIEN_facture_2025-04-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"441708534\", \"rpps_number\": \"10102028205\", \"finess_number\": null, \"doctor_name\": \"Marta DORVAULT\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 28.71, \"mandatory_coverage\": 31.29, \"complementary_coverage\": null, \"client_part\": 28.71, \"remaining_payment\": null, \"insured_name\": \"DURAND Fabien\", \"insured_dob\": \"20-05-1978\", \"beneficiary_name\": \"DURAND Fabien\", \"beneficiary_dob\": \"20-05-1978\", \"invoice_date\": \"07-04-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET D'OPHTALMOLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY 12.3 - BHQP00210\", \"quantity\": 1, \"date_of_service\": \"07-04-2025\", \"mandatory_coverage\": 31.29, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bebdddca-5e13-4baa-9b5a-ea26cd0e2db5_Facture_optalmologie_acquitee_du_28.03.2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10102056388\", \"finess_number\": null, \"doctor_name\": \"Anna-Maria SEUTHE\", \"total_billed\": 121.25, \"bill_paid\": true, \"amount_paid\": 78.28, \"mandatory_coverage\": 42.97, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"CHABOSSEAU CHRISTIAN\", \"insured_dob\": \"15-05-1948\", \"beneficiary_name\": \"CHABOSSEAU CHRISTIAN\", \"beneficiary_dob\": \"15-05-1948\", \"invoice_date\": \"28-03-2025\", \"security_number\": \"148055746312965\", \"invoice_issuer\": \"Centre d'Ophthalmologie franco-allemand\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BZQK00110-BLQP01010\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 121.25}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5f87d4e6-72b2-4b7f-bd9c-39eda4f7067b_8012750_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 26.8, \"bill_paid\": true, \"amount_paid\": 26.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": \"M. Jean MASSET\", \"insured_dob\": null, \"beneficiary_name\": \"M. Jean MASSET\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2025\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity (6) (Sph\\u00e9rique)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 24.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 2.9}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8f2b09a0-d20d-4a94-a70a-fda31024af49_Facture_22_04_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"781713677\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"sybrine ANDREAU\", \"total_billed\": 340.0, \"bill_paid\": true, \"amount_paid\": 135.85, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 204.06, \"client_part\": 135.85, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BLANCHOUIN CHARLYNE\", \"beneficiary_dob\": null, \"invoice_date\": \"03-04-2025\", \"security_number\": \"2050228110320\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique CHARTRES BARJOUVILLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Morture optique Carill\\u00e9 - DBY DBOF 5044 HH Havana Havana\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 140.0}, {\"description\": \"Verre - BBGR Unif 1.6H Blu X'tra UV/IR 689-6BB (TVA)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 75.0}, {\"description\": \"Verre - BBGR Unif 1.6H Blu X'tra UV/IR 674-6BB (TVA)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 75.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7ad6d85f-7245-4c05-8c79-512ed13fe0d5_lunettes_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10100998524\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LANDMAN VU Jade\", \"total_billed\": 67.63, \"bill_paid\": true, \"amount_paid\": 40.0, \"mandatory_coverage\": 28.63, \"complementary_coverage\": 0.0, \"client_part\": 40.0, \"remaining_payment\": null, \"insured_name\": \"SOURMAIL MARCEL\", \"insured_dob\": \"29-09-1956\", \"beneficiary_name\": \"SOURMAIL MARCEL\", \"beneficiary_dob\": \"29-09-1956\", \"invoice_date\": \"08-04-2025\", \"security_number\": \"156095912218956\", \"invoice_issuer\": \"IOP INSTITUT OEIL PAUPIERE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"08-04-2025\", \"mandatory_coverage\": 18.86, \"amount\": 45.63}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"08-04-2025\", \"mandatory_coverage\": 8.99, \"amount\": 22.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ed89e968-9124-4ab7-b58d-40505929eec9_doc03194120250416144711_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"660012147\", \"doctor_name\": \"JULIEN Anne\", \"total_billed\": 72.02, \"bill_paid\": true, \"amount_paid\": 21.6, \"mandatory_coverage\": null, \"complementary_coverage\": 50.42, \"client_part\": 21.6, \"remaining_payment\": 0.0, \"insured_name\": \"FONT Frederic\", \"insured_dob\": \"03-08-1975\", \"beneficiary_name\": \"FONT Frederic\", \"beneficiary_dob\": \"03-08-1975\", \"invoice_date\": \"15-04-2025\", \"security_number\": \"1750866136026\", \"invoice_issuer\": \"CDS De L Hotel De Ville\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OCT+FO\", \"quantity\": 1.0, \"date_of_service\": \"15-04-2025\", \"mandatory_coverage\": null, \"amount\": 57.68}, {\"description\": \"OCT+FO\", \"quantity\": 1.0, \"date_of_service\": \"15-04-2025\", \"mandatory_coverage\": null, \"amount\": 14.34}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/14dc234b-2f2e-44d7-9dbe-65da4b000dbb_Lentilles_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 160.68, \"bill_paid\": true, \"amount_paid\": 160.68, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ABALLAIN L\\u00e9naik\", \"beneficiary_dob\": null, \"invoice_date\": \"07-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Optik Kaufman SA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ULTRA1D96PK-DIA14.2-BC8.6-PWRM1.5 Ultra One Day 90 PK DIA: 14.2 BC: 8.6 PWR: -1.5\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 160.68}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5c4353b0-55a4-4687-9744-d1a6da183abd_facture_lentilles_de_contact_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 78.98, \"bill_paid\": true, \"amount_paid\": 78.98, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Maxence Lamart\", \"beneficiary_dob\": null, \"invoice_date\": \"14-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Vision Direct.fr\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AcOnPI90p 085 142 VISI -1.25 90 lentilles de contact par boite\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 32.91}, {\"description\": \"AcOnPI90p 085 142 VISI -1.50 90 lentilles de contact par boite\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 32.91}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/98e5b959-f0c4-4bc8-8eea-a03b99c10186_ReleveMensuel_Avril_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/eb6204d1-23c0-42d4-a32d-7085d12ef755_facture_sophia_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/eb6204d1-23c0-42d4-a32d-7085d12ef755_facture_sophia_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131048100\", \"rpps_number\": \"10100969400\", \"finess_number\": null, \"doctor_name\": \"Dr Esther HERETH\", \"total_billed\": 152, \"bill_paid\": true, \"amount_paid\": 86, \"mandatory_coverage\": 66, \"complementary_coverage\": 86, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GORINE Sophia\", \"beneficiary_dob\": \"04-06-2015\", \"invoice_date\": \"09-04-2025\", \"security_number\": \"279041305594732\", \"invoice_issuer\": \"Lunettes Pour Tous\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LPT Supply Chain (6 Rue de brouque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.4, \"amount\": 30.0}, {\"description\": \"LPT Supply Chain (6 Rue de brouque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.85, \"amount\": 32.5}, {\"description\": \"LPT Supply Chain (6 Rue de brouque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}, {\"description\": \"Suppl\\u00e9ment Prestation d'appairage\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6, \"amount\": 10}, {\"description\": \"Suppl\\u00e9ment Prestation forfaitaire de la sescurit\\u00e9 sociale \", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 42, \"amount\": 42}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10004433750\", \"finess_number\": \"391848611\", \"doctor_name\": \"A. Cavelius\", \"total_billed\": 44.0, \"bill_paid\": true, \"amount_paid\": 44.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROYER Ludivine\", \"beneficiary_dob\": null, \"invoice_date\": \"08-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de Tavaux\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.8}, {\"description\": \"Hors Nomenclature\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 23.2}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d2fb4d18-d25e-4a3e-8668-ce03334e3a05_facture_OPTICLUDE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Pharmacie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE JACQUES CAROLE\", \"total_billed\": 20.93, \"bill_paid\": true, \"amount_paid\": 11.72, \"mandatory_coverage\": 5.51, \"complementary_coverage\": 3.67, \"client_part\": 11.72, \"remaining_payment\": null, \"insured_name\": \"FERNANDEZ MARIE\", \"insured_dob\": null, \"beneficiary_name\": \"FERNANDEZ JACQUES\", \"beneficiary_dob\": \"26-06-2021\", \"invoice_date\": \"10-01-2025\", \"security_number\": \"23601715417607\", \"invoice_issuer\": \"Pharmacie de Port Saint P\\u00e8re\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1x20,90 OPT\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.51, \"amount\": 11.73}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10001752244\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"RABAH-BENABBAS SALIM\", \"total_billed\": 85.08, \"bill_paid\": true, \"amount_paid\": 45.53, \"mandatory_coverage\": 40.27, \"complementary_coverage\": 0.0, \"client_part\": 45.53, \"remaining_payment\": 0.0, \"insured_name\": \"HEMARIN HARRY\", \"insured_dob\": \"14-05-1994\", \"beneficiary_name\": \"HEMARIN HARRY\", \"beneficiary_dob\": \"14-05-1994\", \"invoice_date\": \"04-04-2025\", \"security_number\": \"164059712024533\", \"invoice_issuer\": \"SELARL DE MEDECIN VISIOCENTRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"04-04-2025\", \"mandatory_coverage\": 20.08, \"amount\": 46.97}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"04-04-2025\", \"mandatory_coverage\": 7.71, \"amount\": 15.32}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"04-04-2025\", \"mandatory_coverage\": 12.48, \"amount\": 8.32}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1d4346b3-1322-457d-9436-ea1d82f74cea_Facture_ophtalmo_enfants_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1d4346b3-1322-457d-9436-ea1d82f74cea_Facture_ophtalmo_enfants_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"130056534\", \"doctor_name\": \"MINIGHETTI Amelie\", \"total_billed\": 20.8, \"bill_paid\": true, \"amount_paid\": 8.32, \"mandatory_coverage\": 12.48, \"complementary_coverage\": 0.0, \"client_part\": 8.32, \"remaining_payment\": 0.0, \"insured_name\": \"BUVE VAISSA\", \"insured_dob\": \"22-07-1981\", \"beneficiary_name\": \"MOREAU LOUCAS\", \"beneficiary_dob\": \"25-06-2010\", \"invoice_date\": \"08-04-2025\", \"security_number\": \"28107991346731\", \"invoice_issuer\": \"CDS OPHTALMO ET MEDICAL D'ALLAUCH\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Measuring of the acuity visuelle and of the refraction with or without dilatation\", \"quantity\": null, \"date_of_service\": \"08-04-2025\", \"mandatory_coverage\": 12.48, \"amount\": 20.8}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}, {\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"130056534\", \"doctor_name\": \"MINIGHETTI Amelie\", \"total_billed\": 39.69, \"bill_paid\": true, \"amount_paid\": 11.9, \"mandatory_coverage\": 27.79, \"complementary_coverage\": 0.0, \"client_part\": 11.9, \"remaining_payment\": 0.0, \"insured_name\": \"BUVE VAISSA\", \"insured_dob\": \"22-07-1981\", \"beneficiary_name\": \"MOREAU LOUCAS\", \"beneficiary_dob\": \"25-06-2010\", \"invoice_date\": \"08-04-2025\", \"security_number\": \"28107991346731\", \"invoice_issuer\": \"CDS OPHTALMO ET MEDICAL D'ALLAUCH\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Examan du fond d'oeil par biomicrosopie avec verre de contact\", \"quantity\": null, \"date_of_service\": \"08-04-2025\", \"mandatory_coverage\": 20.08, \"amount\": 28.68}, {\"description\": \"Pachym\\u00e9trie corn\\u00e9enne\", \"quantity\": null, \"date_of_service\": \"08-04-2025\", \"mandatory_coverage\": 7.71, \"amount\": 11.01}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/14a6fe3e-c49c-4544-b960-43def15eaf83_doc00563220250401124926_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 644.16, \"bill_paid\": true, \"amount_paid\": 274.04, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 370.03, \"client_part\": 274.04, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"30-01-2025\", \"security_number\": null, \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"Optique\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.09, \"amount\": 644.16}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6ac3dac2-9705-4daa-848b-86f960b592f6_IMG_1638_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752632182\", \"doctor_name\": \"Guillaume Cassard\", \"total_billed\": 147, \"bill_paid\": true, \"amount_paid\": 86.1, \"mandatory_coverage\": 60.9, \"complementary_coverage\": 86.1, \"client_part\": 86.1, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JADE SANCHEZ\", \"beneficiary_dob\": null, \"invoice_date\": \"20-03-2025\", \"security_number\": \"279122923212689\", \"invoice_issuer\": \"Lunettes Pour Tous\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.4, \"amount\": 30}, {\"description\": \"LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}, {\"description\": \"LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}, {\"description\": \"Suppl\\u00e9ment Prestation forfaitaire de la sescurit\\u00e9 sociale\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 42, \"amount\": 42}, {\"description\": \"Glass Protect Basic\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c1e8b1d0-d7cf-4247-803e-499d700434b8_Capture_decran_2025-03-26_a_10.31.08_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 117.5, \"bill_paid\": true, \"amount_paid\": 57.5, \"mandatory_coverage\": 60.0, \"complementary_coverage\": null, \"client_part\": 57.5, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CANESTRELLI VERONIQUE\", \"beneficiary_dob\": null, \"invoice_date\": \"26-03-2025\", \"security_number\": null, \"invoice_issuer\": \"GENERALE D'OPTIQUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Equipement\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 117.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c8205dbb-3412-4367-8f8d-a9c66b2add7c_Fds_lunettes_fred_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Pharmacie\", \"adeli_number\": \"622689834\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Carballet\", \"total_billed\": 410.0, \"bill_paid\": true, \"amount_paid\": 410.0, \"mandatory_coverage\": 0.15, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ZAPLATA FREDERIC\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": 166016249810493, \"invoice_issuer\": \"PHARMACIE REMODELIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"M03\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.05, \"amount\": 50}, {\"description\": \"VM2\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.05, \"amount\": 180}, {\"description\": \"VM2\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.05, \"amount\": 180}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0785b11d-69b5-4f5f-b75d-9caf82642652_LENTILLES_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132662727\", \"doctor_name\": \"MAHFUD ALI ABDURRAOF\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 70.0, \"remaining_payment\": 0.0, \"insured_name\": \"GABRIEL Julie\", \"insured_dob\": \"22-09-1981\", \"beneficiary_name\": \"GABRIEL Julie\", \"beneficiary_dob\": \"22-09-1981\", \"invoice_date\": \"25-10-2024\", \"security_number\": \"281099403403152\", \"invoice_issuer\": \"SUDOPTIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE OASYS 14.0 (8.4)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}, {\"description\": \"ACUVUE OASYS FOR ASTIGMATISM 14.5\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 40.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/72bb486e-792d-45ef-b522-511ea16d56ef_IMG_5602_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE MESNIL ROUX\", \"total_billed\": 402.24, \"bill_paid\": true, \"amount_paid\": 205.99, \"mandatory_coverage\": null, \"complementary_coverage\": 196.25, \"client_part\": 205.99, \"remaining_payment\": 0, \"insured_name\": \"YON David\", \"insured_dob\": null, \"beneficiary_name\": \"YON David\", \"beneficiary_dob\": null, \"invoice_date\": \"24-03-2025\", \"security_number\": \"1750776540171\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique ROUEN BARENTIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - Biofinity Multifocal D (6) Dur\\u00e9e de vie Mensuelle (6 mois) de 6 Lentilles (PU 61.84\\u20ac) Souple/Coopervision/Boite De 6 Lentilles (TVA 20.00\\u20ac)\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 123.68}, {\"description\": \"Lentille G - Biofinity Multifocal Toric D (6) Dur\\u00e9e de vie Mensuelle (6 mois) de 6 Lentilles (PU 139.26\\u20ac) Souple/Coopervision/Boite De 6 Lentilles (TVA 20.00\\u20ac)\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 278.56}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e61341e0-47f5-44f0-afdd-b151b4ceb5aa_DEPASSEMENT_FACTURE_IVANOV_R_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"840022461\", \"doctor_name\": \"Dr.BUNBELIS Tomas\", \"total_billed\": 42.0, \"bill_paid\": true, \"amount_paid\": 12.6, \"mandatory_coverage\": 29.4, \"complementary_coverage\": 0.0, \"client_part\": 12.6, \"remaining_payment\": 0.0, \"insured_name\": \"GENEIX VERONIQUE\", \"insured_dob\": \"17-06-1964\", \"beneficiary_name\": \"IVANOV REMI\", \"beneficiary_dob\": \"04-01-2011\", \"invoice_date\": \"12-02-2025\", \"security_number\": \"264066311321986\", \"invoice_issuer\": \"CDS OPHTALMOLOGlQUE D'ORANGE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BQQP002\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": 20.08, \"amount\": 28.68}, {\"description\": \"BQP002\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": 9.32, \"amount\": 13.32}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0007983c-1e48-4197-87d8-9291c7928688_LUNETTE_IVANOV_REMI_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0007983c-1e48-4197-87d8-9291c7928688_LUNETTE_IVANOV_REMI_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"842607459\", \"rpps_number\": null, \"finess_number\": \"840022461\", \"doctor_name\": \"BUBNELIS Tomas\", \"total_billed\": 175.0, \"bill_paid\": true, \"amount_paid\": 69.97, \"mandatory_coverage\": 13.53, \"complementary_coverage\": 91.5, \"client_part\": 69.97, \"remaining_payment\": 0.0, \"insured_name\": \"GENEIX VERONIQUE\", \"insured_dob\": \"04-06-1964\", \"beneficiary_name\": \"REMI IVANOV\", \"beneficiary_dob\": \"04-01-2011\", \"invoice_date\": \"13-02-2025\", \"security_number\": \"264066311321986\", \"invoice_issuer\": \"CSO OPHTALMOLOGlQUE D'ORANGE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Monture\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 100.0}, {\"description\": \"Verre droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}, {\"description\": \"Verre gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"842607459\", \"rpps_number\": null, \"finess_number\": \"840022461\", \"doctor_name\": \"BUBNELIS Tomas\", \"total_billed\": 42.0, \"bill_paid\": true, \"amount_paid\": 12.6, \"mandatory_coverage\": 29.4, \"complementary_coverage\": 0.0, \"client_part\": 12.6, \"remaining_payment\": 0.0, \"insured_name\": \"GENEIX VERONIQUE\", \"insured_dob\": \"04-06-1964\", \"beneficiary_name\": \"REMI IVANOV\", \"beneficiary_dob\": \"04-01-2011\", \"invoice_date\": \"13-02-2025\", \"security_number\": \"264066311321986\", \"invoice_issuer\": \"CDS OPHTALMOLOGIQUE D'ORANGE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BGQP002\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": 20.08, \"amount\": 28.68}, {\"description\": \"BJQP002\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": 9.32, \"amount\": 13.33}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6ee5b983-1954-4a68-9caf-1ea493b5dad4_IMG_6731_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 965.0, \"bill_paid\": true, \"amount_paid\": 714.91, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 250.0, \"client_part\": 714.91, \"remaining_payment\": null, \"insured_name\": \"CHEVALIER PHILIPPE\", \"insured_dob\": \"17-10-1966\", \"beneficiary_name\": \"CHEVALIER PHILIPPE\", \"beneficiary_dob\": \"17-10-1966\", \"invoice_date\": \"27-02-2025\", \"security_number\": \"166102636212431\", \"invoice_issuer\": \"Optic2000\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ETNIA EYEWEAR CULTURE SL - Etnia Barcelona - JARAMA - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 209.0}, {\"description\": \"Essilor intl - Varilux Liberty 3 Orma EPS Cz Sapph - 55/60\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 348.5}, {\"description\": \"Essilor intl - Varilux Liberty 3 Orma EPS Cz Sapph - 55/60\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 348.5}, {\"description\": \"Assuarance r\\u00e9sultat PAVP\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 59.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2da98e03-19d0-430e-90da-a09706de143b_Facture_Lentilles_Stephanie_Marchenay__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"NOCHEZ Yannick\", \"total_billed\": 18600, \"bill_paid\": true, \"amount_paid\": 18600, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"Marchenay Stephanie\", \"insured_dob\": null, \"beneficiary_name\": \"Marchenay Stephanie\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Optique Lafayette Pirae\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et de votre suivi\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Menicon - MENICON PREMIO - 14/8.3 (Biobd\\u00e9bdomat\\u00e9riques) - BOITE DE 6\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 9300}, {\"description\": \"Menicon - MENICON PREMIO - 14/8.3 (Biobd\\u00e9bdomat\\u00e9riques) - BOITE DE 6\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 9300}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ba1b499f-199f-4106-bb2c-aac62a67c235_doc00448420250313084147_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"472696541\", \"doctor_name\": \"STANCIU Voitchita\", \"total_billed\": 644.16, \"bill_paid\": true, \"amount_paid\": 644.16, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 370.03, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"MME BAPELLE ELISABETH\", \"insured_dob\": \"05-07-1968\", \"beneficiary_name\": \"MME BAPELLE ELISABETH\", \"beneficiary_dob\": \"05-07-1968\", \"invoice_date\": \"30-01-2025\", \"security_number\": \"2 68 07 94 059 005 30\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Special Eyes Distribution - LUKKAS - LK LU2015 - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 143.4}, {\"description\": \"ZEISS PRG Clear 3Dve + 15 UVProtect DP - 70/75\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 212.28}, {\"description\": \"ZEISS PRG Clear 3Dve + 15 UVProtect DP - 70/75\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 212.28}, {\"description\": \"OCTIKA - Osmose Ac\\u00e9tate - OSPA144 - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 0.0}, {\"description\": \"ZEISS - PACK CONF PR 1.5 POLA G15 DURCI - 65/70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 38.0}, {\"description\": \"PACK CONF PR 1.5 POLA G15 DURCI - 65/70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 38.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e566a3b0-cc99-4c4d-8715-9f3be841a066_noname_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10003102083\", \"finess_number\": null, \"doctor_name\": \"VAREILLES Marc\", \"total_billed\": 44.0, \"bill_paid\": true, \"amount_paid\": 44.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"RUTIGLIANO Baptiste\", \"insured_dob\": null, \"beneficiary_name\": \"RUTIGLIANO Baptiste\", \"beneficiary_dob\": null, \"invoice_date\": \"17-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet d'Echirolles\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.08}, {\"description\": \"Hors Nomenclature\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 23.2}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4950b870-6662-4fcd-a7e9-1fc3abd0fdc9_IMG_20250321_182918_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131042632\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BURAZOVITCH JERONIMO\", \"total_billed\": 93.69, \"bill_paid\": true, \"amount_paid\": 35.14, \"mandatory_coverage\": 58.55, \"complementary_coverage\": null, \"client_part\": 35.14, \"remaining_payment\": null, \"insured_name\": \"FANISE Magali\", \"insured_dob\": \"27-07-1970\", \"beneficiary_name\": \"FANISE Magali\", \"beneficiary_dob\": \"27-07-1970\", \"invoice_date\": null, \"security_number\": \"2700713155068\", \"invoice_issuer\": null, \"currency\": null, \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 58.55, \"amount\": 93.69}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1918118d-45fa-41a4-8dc7-b6c231244edd_IMG_20250315_173044_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 57.29, \"bill_paid\": true, \"amount_paid\": 9.11, \"mandatory_coverage\": 48.18, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PEREIRA SYLVIE\", \"beneficiary_dob\": null, \"invoice_date\": \"13-03-2025\", \"security_number\": null, \"invoice_issuer\": \"PHARMACIE DES HABERGES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BORAX/AC BOR EG 12mg/18mg/ml S la 20Unid/5ml\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.61}, {\"description\": \"Honoraire simple\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 3.06}, {\"description\": \"THEALOSE S Ocul OEIL SEC MODERE SEVERE 15ML\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 12.78}, {\"description\": \"CATIONORM MULTI EMULS OPHT 10 FL\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 9.41}, {\"description\": \"DULCILARMES 1,5% COLLYR S UNIDOSE 60UNID\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 11}, {\"description\": \"Honoraire simple\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 2.04}, {\"description\": \"VISMED GEL MULTI S ocul st\\u00e9r lubrifi FL/15ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 12.78}, {\"description\": \"Honoraire mesdicament remboursable\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.61}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b7227e60-ca2d-462d-b76d-acb51b8507e3_ZOPIE_CORPETTI_CAROLE_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b7227e60-ca2d-462d-b76d-acb51b8507e3_ZOPIE_CORPETTI_CAROLE_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10002331949\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ZOPIE CORPETTI CAROLE\", \"total_billed\": 82, \"bill_paid\": true, \"amount_paid\": 38.0, \"mandatory_coverage\": 44, \"complementary_coverage\": 0.0, \"client_part\": 38.0, \"remaining_payment\": null, \"insured_name\": \"GLENAC JOSE\", \"insured_dob\": \"10-11-2011\", \"beneficiary_name\": \"GLENAC JOSE\", \"beneficiary_dob\": \"10-11-2011\", \"invoice_date\": \"14-03-2025\", \"security_number\": \"179020741140369\", \"invoice_issuer\": \"Cabinet d'ophtalmologie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 35, \"amount\": 65.22}, {\"description\": \"ADI\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 9, \"amount\": 16.78}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d6b93c3d-f00e-49ac-8fce-ebd2c99ec6a5_Feuille_de_soins_PROBST_Annie_20250315_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"1010178384\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ALAFALEQ MUNIRAH\", \"total_billed\": 20.8, \"bill_paid\": true, \"amount_paid\": 20.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"PROBST ANNIE\", \"insured_dob\": \"07-06-1975\", \"beneficiary_name\": \"PROBST ANNIE\", \"beneficiary_dob\": \"07-06-1975\", \"invoice_date\": \"15-03-2025\", \"security_number\": \"275061310302038\", \"invoice_issuer\": \"ALVES NOELLIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY 8.00\", \"quantity\": null, \"date_of_service\": \"15-03-2025\", \"mandatory_coverage\": null, \"amount\": 20.08}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d35e4ea0-6678-4dd1-b0d5-fe905d262430_FACTURE_GAFFE_Thierry_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131159840\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GABISSON PIERRE\", \"total_billed\": 108.89, \"bill_paid\": true, \"amount_paid\": 40.0, \"mandatory_coverage\": 48.09, \"complementary_coverage\": 20.6, \"client_part\": 40.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GAFFE Thierry\", \"beneficiary_dob\": \"09-07-1960\", \"invoice_date\": \"05-03-2025\", \"security_number\": \"1600776451031\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 48.09, \"amount\": 108.89}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c8fa8d11-c2df-4f39-9909-28790acc5957_Factures_opthalmo_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10101449014\", \"finess_number\": null, \"doctor_name\": \"Dr Omid DAVIDDI\", \"total_billed\": 65.66, \"bill_paid\": true, \"amount_paid\": 37.0, \"mandatory_coverage\": 28.66, \"complementary_coverage\": null, \"client_part\": 37.0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ADJABI Cherif\", \"beneficiary_dob\": \"11-04-1986\", \"invoice_date\": \"16-01-2024\", \"security_number\": \"186045951211869\", \"invoice_issuer\": \"Europole VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BGQP00210-BLQP01010\", \"quantity\": null, \"date_of_service\": \"16-01-2024\", \"mandatory_coverage\": 28.66, \"amount\": 65.66}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/846b99ca-4959-4ee2-8d3f-91549b0b27cc_facture_lentilles_10032025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 111.38, \"bill_paid\": true, \"amount_paid\": 111.38, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"marc bouchez\", \"beneficiary_dob\": null, \"invoice_date\": \"04-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Alensa s.r.o.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acuvue Oasys Multifocal (6 lentilles)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 32.7}, {\"description\": \"Acuvue Oasys Multifocal (6 lentilles)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 32.7}, {\"description\": \"Biofinity (6 lentilles)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 22.99}, {\"description\": \"Biofinity (6 lentilles)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 22.99}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10004085394\", \"finess_number\": null, \"doctor_name\": \"CORNU SEVERINE\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 32.4, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 27.6, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CADET Corine\", \"beneficiary_dob\": null, \"invoice_date\": \"07-03-2025\", \"security_number\": 268099741101779, \"invoice_issuer\": \"CORNU SEVERINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BGQP002\", \"quantity\": null, \"date_of_service\": \"07-03-2025\", \"mandatory_coverage\": null, \"amount\": 44.95}, {\"description\": \"BBQP001\", \"quantity\": null, \"date_of_service\": \"07-03-2025\", \"mandatory_coverage\": null, \"amount\": 15.05}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/499639d7-4faa-4d3e-b412-7f75da0b4e01_quercy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"751753518\", \"rpps_number\": \"10107999475\", \"finess_number\": null, \"doctor_name\": \"Dr BOUAYED BRAHAM Emna\", \"total_billed\": 97.63, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"QUERCY Kilian\", \"beneficiary_dob\": null, \"invoice_date\": \"13-02-2025\", \"security_number\": null, \"invoice_issuer\": \"BOUAYED BRAHAM Emma\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BJQP002 1\", \"quantity\": null, \"date_of_service\": \"13-02-2025\", \"mandatory_coverage\": null, \"amount\": 65.67}, {\"description\": \"BLQP010 2\", \"quantity\": null, \"date_of_service\": \"13-02-2025\", \"mandatory_coverage\": null, \"amount\": 31.76}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0db7928d-d221-48b1-824d-c8522c83add1_Facture_lentilles__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 140.38, \"bill_paid\": true, \"amount_paid\": 140.38, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Celine Gramond\", \"beneficiary_dob\": null, \"invoice_date\": \"27-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Vision Direct\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OAS1D 90S 8.5 143 VS01, -1.00\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58.49}, {\"description\": \"OAS1D 90S 8.5 143 VS01, -1.25\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58.49}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/edb77e5c-ea68-4bad-a177-1256a3bafc9c_CF_1686176_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/edb77e5c-ea68-4bad-a177-1256a3bafc9c_CF_1686176_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"doctor_name\": null, \"total_billed\": 64.48, \"bill_paid\": true, \"amount_paid\": 64.48, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bruno ZINANI\", \"beneficiary_dob\": null, \"invoice_date\": \"24-02-2025\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE SNC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 Day Acuvue Oasys For Astigmatism 30 Corrections OEIL GAUCHE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.94}, {\"description\": \"1 Day Acuvue Oasys For Astigmatism 30 Corrections OEIL DROIT\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.94}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8f0e72ca-64e5-4797-87bc-9378b842433a_CF_1676107_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8f0e72ca-64e5-4797-87bc-9378b842433a_CF_1676107_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"doctor_name\": null, \"total_billed\": 64.48, \"bill_paid\": true, \"amount_paid\": 64.48, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Bruno ZINANI\", \"beneficiary_dob\": null, \"invoice_date\": \"22-01-2025\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE SNC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 Day Acuvue Oasys For Astigmatism 30 Corrections OEIL GAUCHE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.94}, {\"description\": \"1 Day Acuvue Oasys For Astigmatism 30 Corrections OEIL DROIT\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.94}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10102031555\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MATEESCU RADU-LUCIAN\", \"total_billed\": 84.5, \"bill_paid\": true, \"amount_paid\": 44.93, \"mandatory_coverage\": 39.57, \"complementary_coverage\": 0, \"client_part\": 44.93, \"remaining_payment\": 0.0, \"insured_name\": \"POULAIN JUSTINE\", \"insured_dob\": \"15-03-1994\", \"beneficiary_name\": \"POULAIN JUSTINE\", \"beneficiary_dob\": \"15-03-1994\", \"invoice_date\": \"07-03-2025\", \"security_number\": \"294036005739358\", \"invoice_issuer\": \"Centre Ophtalmologique Br\\u00e9tigny Essonne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"07-03-2025\", \"mandatory_coverage\": 20.08, \"amount\": 47.22}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"07-03-2025\", \"mandatory_coverage\": 7.01, \"amount\": 16.48}, {\"description\": \"AMY8\", \"quantity\": null, \"date_of_service\": \"07-03-2025\", \"mandatory_coverage\": 12.48, \"amount\": 20.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/40e6166e-dce5-4710-aba7-538fa663ec6c_IMG_20250306_093631_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"751092081\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PARTOUCHE-ATTIAS JULIE\", \"total_billed\": 80.0, \"bill_paid\": true, \"amount_paid\": 52.37, \"mandatory_coverage\": 27.63, \"complementary_coverage\": null, \"client_part\": 52.37, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROY TRESSERRAS MANEL\", \"beneficiary_dob\": \"14-02-2018\", \"invoice_date\": \"05-03-2025\", \"security_number\": \"1861194041080\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"2 ATM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 27.63, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/32449664-cea0-482f-8dd1-df73e8882dbf_facture_ophtamo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": 591804844, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"STIEVENARD AYMERIC\", \"total_billed\": 61.61, \"bill_paid\": true, \"amount_paid\": 30, \"mandatory_coverage\": 22.13, \"complementary_coverage\": 9.48, \"client_part\": 30, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PIOTROWIAK Frederic\", \"beneficiary_dob\": \"07-09-1968\", \"invoice_date\": \"03-03-2025\", \"security_number\": \"1680962250017\", \"invoice_issuer\": \"Centre Medical Ophtalmologique Point Vision Lille-Lesquin\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e405dba0-fd0b-48a0-9e3f-576398fa3318_Scan2025-02-28_143544_1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [{\"description\": null, \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 52}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/51062628-4c2d-444e-9359-ff2acb6104a8_facture_ophtalmologue_1_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10100580926\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MALAK CHEGGOUR\", \"total_billed\": 42, \"bill_paid\": true, \"amount_paid\": 12.6, \"mandatory_coverage\": 29.4, \"complementary_coverage\": 0, \"client_part\": 12.6, \"remaining_payment\": null, \"insured_name\": \"PORET TIFFANY\", \"insured_dob\": \"16-05-2002\", \"beneficiary_name\": \"PORET TIFFANY\", \"beneficiary_dob\": \"16-05-2002\", \"invoice_date\": \"03-03-2025\", \"security_number\": \"202050840927720\", \"invoice_issuer\": \"SCM P\\u00d4LE OISE OPHTALMOLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"03-03-2025\", \"mandatory_coverage\": 20.08, \"amount\": 28.68}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"03-03-2025\", \"mandatory_coverage\": 9.32, \"amount\": 13.32}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10100580926\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Dr Malak Cheggour\", \"total_billed\": 72.63, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": 27.63, \"complementary_coverage\": 0, \"client_part\": 45, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PORET TIFFANY\", \"beneficiary_dob\": \"16-05-1997\", \"invoice_date\": \"26-02-2025\", \"security_number\": \"2 02 05 08 409 277 20\", \"invoice_issuer\": \"SCM P\\u00d4LE OISE OPHTALMOLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"26-02-2025\", \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": \"26-02-2025\", \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ffc129de-c82b-4c0b-8ab8-2941fd28cda9_Facture_opticien_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ffc129de-c82b-4c0b-8ab8-2941fd28cda9_Facture_opticien_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10101122454\", \"finess_number\": \"132671298\", \"doctor_name\": \"Dr GUIIGUE BENAYOUN Helena\", \"total_billed\": 525.6, \"bill_paid\": true, \"amount_paid\": 319.48, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 206.03, \"client_part\": 319.48, \"remaining_payment\": null, \"insured_name\": \"MME REGIONI JENNIFER\", \"insured_dob\": \"12-08-1993\", \"beneficiary_name\": \"MME REGIONI JENNIFER\", \"beneficiary_dob\": \"12-08-1993\", \"invoice_date\": \"10-02-2025\", \"security_number\": \"293081315566488\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Luxottica - Dolce & Gabbana - DG5026 -Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 285}, {\"description\": \"BBGR - Nikon SeeMax AP Fv 1.60 TGNS SCBUV -70 Oeil Droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 122.12}, {\"description\": \"BBGR - Nikon SeeMax AP Fv 1.60 TGNS SCBUV -70 Oeil Gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 122.18}, {\"description\": \"OPTICAL PREVOYANCE 2 ANS 1ERE PAIRE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 49}, {\"description\": \"Luxottica - Ray-Ban - RB3548N - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 191}, {\"description\": \"OPTICAL CENTER - Level Junior Sun - LLLEJUS1605 - Autres\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 59}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2dbb4025-f570-49f9-83d1-08aedb2b122d_lunettes_Lou-Ann_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"661002444\", \"rpps_number\": \"10100710325\", \"finess_number\": \"662696269\", \"doctor_name\": \"Dr VRIELYNCK Els \", \"total_billed\": 213.4, \"bill_paid\": true, \"amount_paid\": 58.4, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 154.31, \"client_part\": 59, \"remaining_payment\": null, \"insured_name\": \"LAURENCIE VIRGINE\", \"insured_dob\": \"13-08-1979\", \"beneficiary_name\": \"LAURENCE SALVAT LOU-ANN\", \"beneficiary_dob\": \"23-03-2011\", \"invoice_date\": \"03-12-2024\", \"security_number\": \"279086613614247\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PAC - Level PP - LE24129 - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 99}, {\"description\": \"BBGR - Nikon SuperSee AP 1.60 PurBlueUV - HCC+UV - 55/60\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 77}, {\"description\": \"BBGR - Nikon SuperSee AP 1.60 PurBlueUV - HCC+UV - 55/60\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 77}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/795c0773-842a-467d-b78f-07746869b761_IMG_20250228_084908_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"310034384\", \"doctor_name\": \"NEAGU Elena-Mihaela\", \"total_billed\": 36.92, \"bill_paid\": true, \"amount_paid\": 11.08, \"mandatory_coverage\": 25.84, \"complementary_coverage\": 0, \"client_part\": 11.08, \"remaining_payment\": 0, \"insured_name\": \"MINOVES DELPHINE\", \"insured_dob\": \"26-06-1978\", \"beneficiary_name\": \"MINOVES MATHYS\", \"beneficiary_dob\": \"02-09-2010\", \"invoice_date\": \"29-01-2025\", \"security_number\": \"278060916004411\", \"invoice_issuer\": \"CDS OPHTALMOLOGlQUE AFSOT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BGQP009 ADI\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": 19.12, \"amount\": 27.32}, {\"description\": \"BBQP001 ATM\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": 6.72, \"amount\": 9.6}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e6272522-cc2a-44b0-af14-9f54d1a76a59_Facture_lunettes__2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e6272522-cc2a-44b0-af14-9f54d1a76a59_Facture_lunettes__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"711006544\", \"rpps_number\": null, \"finess_number\": \"712603273\", \"doctor_name\": null, \"total_billed\": 691.62, \"bill_paid\": true, \"amount_paid\": 431.56, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 259.97, \"client_part\": 431.56, \"remaining_payment\": null, \"insured_name\": \"ZENDJABIL INES\", \"insured_dob\": \"06-03-1999\", \"beneficiary_name\": \"ZENDJABIL INES\", \"beneficiary_dob\": \"06-03-1999\", \"invoice_date\": \"08-10-2024\", \"security_number\": \"299037147522941\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Kering Eyewear - Gucci - GG09860 - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 460}, {\"description\": \"BBGR - Nikon SuperSee 1.50 PurBlueUV\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 100.31}, {\"description\": \"BBGR - Nikon SuperSee 1.50 PurBlueUV\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 100.31}, {\"description\": \"GARANTIE 2 ANS 1ERE PAIRE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 49.0}, {\"description\": \"Special Eyes Distribution - Filium - FI FI1912 -Perc\\u00e9e\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 289}, {\"description\": \"BBGR - Pack Conf LYRIS AS 1.6 Fab ARAST - 55 \", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 103}, {\"description\": \"BBGR - Pack Conf LYRIS AS 1.6 Fab ARAST - 55 \", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 103}, {\"description\": \"OPTICAL PREVOYANCE 2 ANS 2EME PAIRE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 49}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/de6a66cd-d729-479c-a119-8b9e442702db_Screenshot_20250227_112521_Gallery_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10003251237\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MURA FREDERIC\", \"total_billed\": 57.63, \"bill_paid\": true, \"amount_paid\": 30, \"mandatory_coverage\": 27.63, \"complementary_coverage\": 0, \"client_part\": 30, \"remaining_payment\": 0.0, \"insured_name\": \"MERADI IDIR\", \"insured_dob\": \"09-03-1997\", \"beneficiary_name\": \"MERADI IDIR\", \"beneficiary_dob\": \"09-03-1997\", \"invoice_date\": \"25-02-2025\", \"security_number\": \"197039935281911\", \"invoice_issuer\": \"MONTPELLIER CENTRE OPHTALMOLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"25-02-2025\", \"mandatory_coverage\": 18.64, \"amount\": 38.88}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"25-02-2025\", \"mandatory_coverage\": 8.99, \"amount\": 18.75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fcc5ea07-4d36-4d6d-9a51-d87107b14a00_Facture_0296202502750200566_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"JEAN-Fran\\u00e7ois DOUET\", \"total_billed\": 322.0, \"bill_paid\": true, \"amount_paid\": 117.85, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 204.06, \"client_part\": 117.85, \"remaining_payment\": null, \"insured_name\": \"MARCHAND GERALDINE\", \"insured_dob\": null, \"beneficiary_name\": \"MASSONNAT NILS\", \"beneficiary_dob\": null, \"invoice_date\": \"24-02-2025\", \"security_number\": \"2 79 11 75 112 154\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique FRESNES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Morture optique Carat\\u00e9 - NIKE 550 029 BLACKVOLT\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 112.0}, {\"description\": \"Verre D - BBGR Unor 1.60 NResist+UV \\u00d866-/BBG\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 105.0}, {\"description\": \"Verre G - BBGR Unor 1.60 NResist+UV \\u00d862-/BBG\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 105.0}, {\"description\": \"Pack brise PREMIUM\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/455854fe-84e6-4ece-9492-276410b4e422_BORDERES1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"31100391175\", \"rpps_number\": \"1010043580\", \"finess_number\": null, \"doctor_name\": \"Alexandre CAUSSE\", \"total_billed\": 141.68, \"bill_paid\": true, \"amount_paid\": 85.18, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BORDERES Frederic\", \"beneficiary_dob\": null, \"invoice_date\": \"11-02-2025\", \"security_number\": \"14909315500289\", \"invoice_issuer\": \"POINT VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 141.68}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/319549b4-5b59-4bcf-97de-4067c938378a_Facture_ophtalmo_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10003944641\", \"finess_number\": \"421805318\", \"doctor_name\": \"COHEN Gilbert\", \"total_billed\": 44.0, \"bill_paid\": true, \"amount_paid\": 44.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NOVELLA Pauline\", \"beneficiary_dob\": null, \"invoice_date\": \"20-02-2025\", \"security_number\": \"204014315708668\", \"invoice_issuer\": \"Cabinet de l'Horme\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY8\", \"quantity\": 1, \"date_of_service\": \"20-02-2025\", \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Hors Nomenclature\", \"quantity\": 1, \"date_of_service\": \"20-02-2025\", \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/11895e77-e913-4eee-bdc2-ecf12d7d73a8_480502092_2183001078819527_8365903175877951864_n_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10002317609\", \"finess_number\": null, \"doctor_name\": \"DR IETAN JACQUES\", \"total_billed\": 26.44, \"bill_paid\": true, \"amount_paid\": 7.93, \"mandatory_coverage\": 18.51, \"complementary_coverage\": null, \"client_part\": 7.93, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ERRIERA EMILIE\", \"beneficiary_dob\": \"22-01-2009\", \"invoice_date\": \"21-02-2025\", \"security_number\": \"276109933307093\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": null, \"quantity\": null, \"date_of_service\": \"21-02-2025\", \"mandatory_coverage\": null, \"amount\": 18.51}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/70546a38-b6d7-4151-b08a-82b01c8e4a98_480755849_2183001178819517_1003204107786000285_n_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"971744834\", \"rpps_number\": \"10002317609\", \"finess_number\": null, \"doctor_name\": \"DETAN JACQUES\", \"total_billed\": 26.44, \"bill_paid\": true, \"amount_paid\": 7.93, \"mandatory_coverage\": 18.51, \"complementary_coverage\": null, \"client_part\": 7.93, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ERRERA NOROISOA\", \"beneficiary_dob\": \"11-10-1976\", \"invoice_date\": \"21-02-2025\", \"security_number\": \"276109933307093\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"Examen de la vision binoculaire\", \"quantity\": null, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 26.44}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/06e1588f-71a7-43fc-bf99-d066db847189_Opticien__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"691718175\", \"rpps_number\": \"10102087805\", \"finess_number\": \"692679723\", \"doctor_name\": \"CHAOUD JARROCH Hala\", \"total_billed\": 375.6, \"bill_paid\": true, \"amount_paid\": 321.45, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 54.06, \"client_part\": 321.45, \"remaining_payment\": 0.0, \"insured_name\": \"VALARCHE RYAN\", \"insured_dob\": \"23-01-2001\", \"beneficiary_name\": \"VALARCHE RYAN\", \"beneficiary_dob\": \"23-01-2001\", \"invoice_date\": \"17-02-2025\", \"security_number\": \"101011310310854\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ODLM - Faconnable - FR005 - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 238.0}, {\"description\": \"Zeiss - ZeISS UF Stock ClearView 15 UVProtect BP - 70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 73.8}, {\"description\": \"Zeiss - ZeISS UF Stock ClearView 15 UVProtect BP - 70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 73.8}, {\"description\": \"Garantie 3 ans - OPTICAL PREVOYANCE 1ERE PAIRE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 59.0}, {\"description\": \"Special Eyes Distribution - Lukkas Sun - LKLUS1906 - Traditionne\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 169.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e1f9c9d0-a523-4242-b3f4-4f8a10391780_Facture_du_140225_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 10.65, \"bill_paid\": true, \"amount_paid\": 3.72, \"mandatory_coverage\": 6.93, \"complementary_coverage\": 0.0, \"client_part\": 3.72, \"remaining_payment\": 0.0, \"insured_name\": \"MINETTE DANY\", \"insured_dob\": \"02-07-1966\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"14-02-2025\", \"security_number\": \"266077511202105\", \"invoice_issuer\": \"PHARMACIE PASTEUR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"HB7\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 9.84}, {\"description\": \"HOR\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.81}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c21b71c3-edeb-458b-b9d5-6977d39092d2_Screenshot_20250215_191517_cn_wps_moffice_eng_PDFReader1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"JEAN-MICHEL MURATET\", \"total_billed\": 122.8, \"bill_paid\": true, \"amount_paid\": 92.8, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 30, \"client_part\": 92.8, \"remaining_payment\": null, \"insured_name\": \"GOMOLLA-DANY\", \"insured_dob\": null, \"beneficiary_name\": \"GOMOLLA-LEGRAND Camille\", \"beneficiary_dob\": null, \"invoice_date\": \"15-02-2025\", \"security_number\": \"1740862119224\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique PAMIERS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - Biofinity Toric (6) Dur\\u00e9e de vie:Mensuelle (PU:42,40\\u20ac)SoupleCoopervisionBoite De 6 Lentilles (TVA 20,00\\u20ac)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 42.4}, {\"description\": \"Lentille G - Biofinity Toric (6) Dur\\u00e9e de vie:Mensuelle (PU:42,40\\u20ac)SoupleCoopervisionBoite De 6 Lentilles (TVA 20,00\\u20ac)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 42.4}, {\"description\": \"CARTE FIDELITE (TVA 20,00\\u20ac)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 10.0}, {\"description\": \"Regard-Pack 3X355 MIHOR (TVA 20,00\\u20ac)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 28.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7f084bf4-357d-452d-bb4c-76bff332ea0c_IMG_7791_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"841703887\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GAVEN ROGNON ISABELLE\", \"total_billed\": 78.43, \"bill_paid\": true, \"amount_paid\": 46.82, \"mandatory_coverage\": 22.13, \"complementary_coverage\": 9.48, \"client_part\": 46.82, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TRINCHILLO Ludovic\", \"beneficiary_dob\": \"27-09-1976\", \"invoice_date\": \"13-02-2025\", \"security_number\": \"1760913054041\", \"invoice_issuer\": \"Centre M\\u00e9dical Ophtalmologique\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0a639214-b39e-4971-8d51-d83b7d2d2c2e_facture_lentilles_17_01_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SELIM RENAMEUR\", \"total_billed\": 540.8, \"bill_paid\": true, \"amount_paid\": 380.8, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 160.0, \"client_part\": 380.8, \"remaining_payment\": null, \"insured_name\": \"MAITRET Gilbert\", \"insured_dob\": null, \"beneficiary_name\": \"MAITRET Gilbert\", \"beneficiary_dob\": null, \"invoice_date\": \"17-01-2025\", \"security_number\": \"1540866137003\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique PERPIGNAN POLYGONE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Lentille D - Biofinity Multifocal Toric N (6) Dur\\u00e9e de vie Mensuelle (PU:135,20\\u20ac) Souple/Coopervision/Boite De 6 Lentilles (TVA 20.0\\u20ac)\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 270.4}, {\"description\": \"Lentille G - Biofinity Multifocal Toric N (6) Dur\\u00e9e de vie Mensuelle (PU:135,20\\u20ac) Souple/Coopervision/Boite De 6 Lentilles \", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 270.4}, {\"description\": \"CARTE FIDELITE (TVA 20.0\\u20ac)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 10.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7957eec3-5b84-4e8d-bde3-99aa88ceef07_Facture_opticien_Mme_HERMANN_CLAUDETTE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ALLIEU Sandrine\", \"total_billed\": 253.6, \"bill_paid\": null, \"amount_paid\": 29.55, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 223.96, \"client_part\": 29.55, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Claudette HERMANN\", \"beneficiary_dob\": null, \"invoice_date\": \"22-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Krys LATTES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Luxtra-80 Route des Luci\\u00e8res - FR 0540 Valbonne Ray Ban Vista 57006 577 HAVANA 1217-377 8503673425245 Plante\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 113.6}, {\"description\": \"Optiswiss France Sarl\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 70.0}, {\"description\": \"Optibles DUAL-UV super antifatigue Premium antifatigue anti-irritation anti-scurfies\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Optiswiss France Sarl\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 70.0}, {\"description\": \"Optibles DUAL-UV super antireflet\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/44e87f9e-a4e2-4851-b383-e1401b36e853_image_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"131025397\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MAHFUD ALI ABDURRAOF\", \"total_billed\": 118.14, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": 40.7, \"complementary_coverage\": 17.44, \"client_part\": 60.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DESCHAMPS Laurent\", \"beneficiary_dob\": \"14-06-1968\", \"invoice_date\": \"03-02-2025\", \"security_number\": \"1680606088153\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9838138f-ae12-4664-831e-ae5f0b9c14fb_IMG_5959_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10100538304\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MASCALI REMI\", \"total_billed\": 72.63, \"bill_paid\": true, \"amount_paid\": 45, \"mandatory_coverage\": 27.63, \"complementary_coverage\": 0.0, \"client_part\": 45, \"remaining_payment\": null, \"insured_name\": \"DIOP YEIYA\", \"insured_dob\": \"05-12-1990\", \"beneficiary_name\": \"DIOP YEIYA\", \"beneficiary_dob\": \"05-12-1990\", \"invoice_date\": \"06-02-2025\", \"security_number\": \"290126822406460\", \"invoice_issuer\": \"SELARL DES DRs DARUGAR ET MASCALI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": 18.64, \"amount\": 49.0}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"20250206\", \"mandatory_coverage\": 8.99, \"amount\": 23.63}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/879cf76a-68c1-40d7-b444-0ff3341c97de_facture-zami-isabelle-20250130-1703_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Chirurgie dentaire\", \"adeli_number\": null, \"rpps_number\": \"10101300787\", \"finess_number\": null, \"doctor_name\": \"Lisa SOTO\", \"total_billed\": 3000.0, \"bill_paid\": true, \"amount_paid\": 3000.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"ZAMI Isabelle\", \"insured_dob\": \"11-08-1994\", \"beneficiary_name\": \"ZAMI Isabelle\", \"beneficiary_dob\": \"11-08-1994\", \"invoice_date\": \"30-01-2025\", \"security_number\": \"23408972220123\", \"invoice_issuer\": \"Docteur Lisa SOTO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Proth\\u00e8se amovible d\\u00e9finitive r\\u00e9sine\", \"quantity\": 5, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 2300.0}, {\"description\": \"Rebasage de Proth\\u00e8se amovible compl\\u00e8te\", \"quantity\": 9, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 350.0}, {\"description\": \"Rebasage de Proth\\u00e8se amovible compl\\u00e8te\", \"quantity\": 9, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 350.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"940026628\", \"doctor_name\": null, \"total_billed\": 252, \"bill_paid\": true, \"amount_paid\": 172.2, \"mandatory_coverage\": 79.8, \"complementary_coverage\": 172.2, \"client_part\": 0.0, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rafika Aubeneau\", \"beneficiary_dob\": null, \"invoice_date\": \"05-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Centre Ophtalmologique Cr\\u00e9teil Soleil\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Monture LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.4, \"amount\": 30}, {\"description\": \"Oeil Droit LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 16.2, \"amount\": 90}, {\"description\": \"Oeil Gauche LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 16.2, \"amount\": 90}, {\"description\": \"Suppl\\u00e9ment\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 42.0, \"amount\": 42.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c2009b55-58d4-411f-a25a-3dc1e32e4a89_facture_lentille_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 96.9, \"bill_paid\": true, \"amount_paid\": 96.9, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Romain BESNARD\", \"beneficiary_dob\": null, \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity (6) (Lentilles sph\\u00e9riques) Oeil droit\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.0}, {\"description\": \"Biofinity (6) (Lentilles sph\\u00e9riques) Oeil gauche\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.0}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.9}, {\"description\": \"R\\u00e9duction produit(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -3.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b6f16c2c-bd49-4062-b2ee-486cef3a62a7_Facture_Mr_Boulahia_Mohammed_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"942631995\", \"doctor_name\": null, \"total_billed\": 401.2, \"bill_paid\": true, \"amount_paid\": 256.15, \"mandatory_coverage\": 15.33, \"complementary_coverage\": 129.72, \"client_part\": 256.15, \"remaining_payment\": 0, \"insured_name\": \"M. BOULAHIA MOHAMED\", \"insured_dob\": \"19-08-1929\", \"beneficiary_name\": \"M. BOULAHIA MOHAMED\", \"beneficiary_dob\": \"19-08-1929\", \"invoice_date\": \"27-12-2024\", \"security_number\": \"129089340312533\", \"invoice_issuer\": \"Optic 2000 Thiais\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Reflect - REFLECT - EAMES - Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 116.0}, {\"description\": \"BBGR - ClassA Unif As 1.67 AR - 65\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 7.65, \"amount\": 42.5}, {\"description\": \"BBGR - ClassA Unif As 1.67 AR - 65\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 7.65, \"amount\": 42.5}, {\"description\": \"CARTE BLANCHE - NAF NAF - MURIELLE - Traditionnel\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 95.2}, {\"description\": \"BBGR - ClassA Unif As 1.67 AR - 65\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 52.5}, {\"description\": \"BBGR - ClassA Unif As 1.67 AR - 65\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 52.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a63e27cd-9d32-4806-9cb9-e3bc43a3c999_image03-02-2025-093926_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 149.1, \"bill_paid\": true, \"amount_paid\": 149.1, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BARSAC MAURIN Marie Ange\", \"beneficiary_dob\": null, \"invoice_date\": \"06-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Optik Kaufman SA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACCOBMAX13M/L38PK-DIA14.3-BO8.4-PWR2-ADDH\", \"quantity\": 3, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 149.1}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7b830a57-2131-479c-bf31-1673e7308903_invoice_2025_1_31_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7b830a57-2131-479c-bf31-1673e7308903_invoice_2025_1_31_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 75.95, \"bill_paid\": true, \"amount_paid\": 75.95, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nicolas Garrido\", \"beneficiary_dob\": null, \"invoice_date\": \"07-11-2024\", \"security_number\": null, \"invoice_issuer\": \"Vision Direct\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Everclear Eye Drops Emea\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 14.99}, {\"description\": \"BIOFINITY 6P, -1.00\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 14.99}, {\"description\": \"BIOFINITY 6P, -1.00\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 14.99}, {\"description\": \"BIOFINITY 6P, -1.00\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 14.99}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Otique\", \"adeli_number\": \"061754057\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LANTERI SARA\", \"total_billed\": 74.61, \"bill_paid\": true, \"amount_paid\": 43.0, \"mandatory_coverage\": 22.13, \"complementary_coverage\": 9.48, \"client_part\": 43.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"COURANT Nils\", \"beneficiary_dob\": \"07-04-2006\", \"invoice_date\": \"27-01-2025\", \"security_number\": \"1740337261299\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6221379e-7344-4868-9ee0-04f2c4e26874_consultation_ophtalmo27janvier2025BEAUPERINyann_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6221379e-7344-4868-9ee0-04f2c4e26874_consultation_ophtalmo27janvier2025BEAUPERINyann_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"781712559\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MESSAOUDI NABIL\", \"total_billed\": 20.8, \"bill_paid\": true, \"amount_paid\": 8.32, \"mandatory_coverage\": 12.48, \"complementary_coverage\": null, \"client_part\": 8.32, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BEAUPERIN Yann\", \"beneficiary_dob\": \"11-10-1974\", \"invoice_date\": \"27-01-2025\", \"security_number\": \"1741056260087\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY 8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 12.48, \"amount\": 20.86}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ad0eb300-fe6a-4f80-ad2a-1c020036f9a0_IMG_20250127_0001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"937890451\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 35.0, \"bill_paid\": true, \"amount_paid\": 35.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SPILMONT GERARD\", \"beneficiary_dob\": \"22-03-1952\", \"invoice_date\": null, \"security_number\": \"152035924909826\", \"invoice_issuer\": \"OPTICAL CENTER ANTIBES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"RJT\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 1.5, \"amount\": null}, {\"description\": \"PL\", \"quantity\": 10, \"date_of_service\": null, \"mandatory_coverage\": 1.5, \"amount\": 35.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"132664467\", \"doctor_name\": \"Lamdaoui\", \"total_billed\": 167, \"bill_paid\": true, \"amount_paid\": 98.3, \"mandatory_coverage\": 68.7, \"complementary_coverage\": 98.3, \"client_part\": 0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Abdelkader Behar\", \"beneficiary_dob\": null, \"invoice_date\": \"08-01-2025\", \"security_number\": \"187065960604422\", \"invoice_issuer\": \"Lunettes Pour Tous\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Monture LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.4, \"amount\": 30}, {\"description\": \"Oeil Droit LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 7.65, \"amount\": 42.5}, {\"description\": \"Oeil Gauche LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}, {\"description\": \"Unifocal - Digital Protect Pro 1.6 - Organique (MR8) Verre transparent, anti-rayures, anti-salissures, anti-reflets, hydrophobe, anti-lumi\\u00e8re bleue 99%\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 7.65}, {\"description\": \"Suppl\\u00e9ment\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.9, \"amount\": 5.0}, {\"description\": \"Suppl\\u00e9ment\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.0, \"amount\": 10.0}, {\"description\": \"Suppl\\u00e9ment\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 42, \"amount\": 42}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f19b0290-ea44-45ae-9c34-719083ec4671_Lentilles_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"750062093\", \"rpps_number\": \"10101500956\", \"finess_number\": \"662699604\", \"doctor_name\": \"CORTES Laura\", \"total_billed\": 272.0, \"bill_paid\": true, \"amount_paid\": 36.5, \"mandatory_coverage\": null, \"complementary_coverage\": 272.0, \"client_part\": 36.5, \"remaining_payment\": null, \"insured_name\": \"M. SABINEU VIVIEN\", \"insured_dob\": \"16-11-2001\", \"beneficiary_name\": \"M. SABINEU VIVIEN\", \"beneficiary_dob\": \"16-11-2001\", \"invoice_date\": \"21-01-2025\", \"security_number\": \"101116613629432\", \"invoice_issuer\": \"ALAIN AFFLELOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et votre suivi\", \"quantity\": 4, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 136.0}, {\"description\": \"Johnson & Johnson - Accuvue\\u00ae Oasys\\u00ae for Astigmatism 6L - 14.5/8.6 (Bihedeobdomatrides) - BOITE DE 6\", \"quantity\": 4, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 136.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f598acd4-d881-4994-8f92-4d953194beab_facture_salomon_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f598acd4-d881-4994-8f92-4d953194beab_facture_salomon_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"751978883\", \"rpps_number\": \"10100139665\", \"finess_number\": \"752602805\", \"doctor_name\": \"Dr VAN WENT Charles\", \"total_billed\": 210.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": 160, \"client_part\": 50.0, \"remaining_payment\": null, \"insured_name\": \"SALOMON SYLVIE\", \"insured_dob\": \"28-01-1977\", \"beneficiary_name\": \"SALOMON SYLVIE\", \"beneficiary_dob\": \"28-01-1977\", \"invoice_date\": \"26-10-2024\", \"security_number\": \"277010217304668\", \"invoice_issuer\": \"SAS CLAIROPTIC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Conseils de manipulation pour la pose, le retrait et l'entretien des lentilles de contact et de votre suivi\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 105.0}, {\"description\": \"Alcon - DAILIES TOTAL 1 90L - 14.1/8.5 (Journali\\u00e8res) - BOITE DE 90\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 105.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10102056388\", \"finess_number\": null, \"doctor_name\": \"Anna-Maria SEUTHE\", \"total_billed\": 119.48, \"bill_paid\": true, \"amount_paid\": 76.51, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NETZER JEAN-MARIE\", \"beneficiary_dob\": null, \"invoice_date\": \"02-01-2025\", \"security_number\": null, \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": null, \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4f09f855-c993-42ea-97b1-3dd728b53e16_devis_ophtalmo_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [{\"description\": null, \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9eaebd69-232f-40b6-9bb2-646f59308015_Adobe_Scan_16_janv._2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"250021276\", \"doctor_name\": null, \"total_billed\": 68.69, \"bill_paid\": true, \"amount_paid\": 68.69, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DIMANCHE Nathalie\", \"insured_dob\": \"15-09-1968\", \"beneficiary_name\": \"DIMANCHE Nathalie\", \"beneficiary_dob\": \"15-09-1968\", \"invoice_date\": \"15-01-2025\", \"security_number\": \"268092505616908\", \"invoice_issuer\": \"CDS CITY SANTHE BESANCON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OCT+PACHYMETRIE ATM\", \"quantity\": 1.0, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 57.68}, {\"description\": \"OCT+PACHYMETRIE ATM\", \"quantity\": 1.0, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 11.01}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b98b1381-e3b2-41b9-87c8-a75b0a7d5e15_ReleveMensuelDecembre2024_CPAM_optique_Auregane_Daden_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fe86f6a8-8360-458e-834c-805ac99350cf_Monsieur_OUEDRAOGO_DAOUDA_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": \"971456983\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"OUEDRAOGO DAOUDA\", \"total_billed\": 87.0, \"bill_paid\": true, \"amount_paid\": 26.1, \"mandatory_coverage\": 60.9, \"complementary_coverage\": null, \"client_part\": 26.1, \"remaining_payment\": null, \"insured_name\": \"RAGOU CASSANDRA\", \"insured_dob\": \"07-02-2004\", \"beneficiary_name\": \"RAGOU CASSANDRA\", \"beneficiary_dob\": \"07-02-2004\", \"invoice_date\": \"08-01-2025\", \"security_number\": \"2040297416897\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"APC 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"MCU 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6ba7415c-4d85-465b-bc52-d60959b9457f_Facture_NCBL_Cataracte_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6ba7415c-4d85-465b-bc52-d60959b9457f_Facture_NCBL_Cataracte_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Soins hospitalier\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"33078040\", \"doctor_name\": null, \"total_billed\": 1543.06, \"bill_paid\": true, \"amount_paid\": 1543.06, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BONNET GERARD\", \"beneficiary_dob\": \"09-09-1950\", \"invoice_date\": \"09-01-2025\", \"security_number\": \"1500999351398\", \"invoice_issuer\": \"CLINIQUE BEL AIR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"GHS PAS\", \"quantity\": 1, \"date_of_service\": \"07-01-2025\", \"mandatory_coverage\": 786.36, \"amount\": 810.36}, {\"description\": null, \"quantity\": 1, \"date_of_service\": \"07-01-2025\", \"mandatory_coverage\": 382.7, \"amount\": 732.7}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c4930995-bbbe-45dd-9afe-800005fb5890_Facture_lunettes__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10008767310\", \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 210, \"bill_paid\": true, \"amount_paid\": 210, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lena GRZESKIEWICZ\", \"beneficiary_dob\": \"24-12-2003\", \"invoice_date\": \"09-01-2025\", \"security_number\": \"203126938421128\", \"invoice_issuer\": \"JIMMY FAIRLY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Monture\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.05, \"amount\": 135.0}, {\"description\": \"Verre droit\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 11.25, \"amount\": 37.5}, {\"description\": \"Verre gauche\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 11.25, \"amount\": 37.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1498c5f5-22cf-41d4-87f0-9b295ff481f4_IMG_5659_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DOCTEUR GHASI IMAD\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Wion Jeremy\", \"beneficiary_dob\": \"29-12-1985\", \"invoice_date\": \"06-01-2025\", \"security_number\": \"185125960622337\", \"invoice_issuer\": \"DOCTEUR GHASI IMAD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BGQP002\", \"quantity\": 1, \"date_of_service\": \"06-01-2025\", \"mandatory_coverage\": null, \"amount\": 28.29}, {\"description\": \"BJQP002\", \"quantity\": 1, \"date_of_service\": \"06-01-2025\", \"mandatory_coverage\": null, \"amount\": 13.12}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6589f4fa-ebac-459a-9ebe-aa84820a2905_FACTURE_LENTILLE_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752615948\", \"doctor_name\": null, \"total_billed\": 159.0, \"bill_paid\": true, \"amount_paid\": 159.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JULIEN CHARPENTIER\", \"beneficiary_dob\": \"12-01-1988\", \"invoice_date\": \"19-12-2024\", \"security_number\": \"188017831101514\", \"invoice_issuer\": \"Optical Center\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ALCON OPTI-FREE PURE MOIST Pack 2x300ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 19.0}, {\"description\": \"JOHNSON & JOHNSON ACUVUE OASYS WITH HYDRACLEAR PLUS (12)\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"JOHNSON & JOHNSON ACUVUE OASYS WITH HYDRACLEAR PLUS (12)\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/179673c3-7b80-47cb-9467-3425e010905a_ordonnance08.10.2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"adeli_number\": \"501012033\", \"rpps_number\": \"10102252987\", \"finess_number\": null, \"doctor_name\": \"Dr A. BOUMANI\", \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [{\"description\": \"Une paire de lunettes avec monture Simple foyer Vision de loin\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Une paire de lunettes avec monture Simple foyer Vision de pr\\u00e8s\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e0b52e15-e34c-41a0-9abe-12afcf235ce3_Moris_Fabrice_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"932657430\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 454.38, \"bill_paid\": true, \"amount_paid\": 114.29, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 340.0, \"client_part\": 114.29, \"remaining_payment\": null, \"insured_name\": \"MORIS FABRICE\", \"insured_dob\": null, \"beneficiary_name\": \"MORIS FABRICE\", \"beneficiary_dob\": null, \"invoice_date\": \"09-12-2024\", \"security_number\": \"1741193029081\", \"invoice_issuer\": \"GrandOptical ROSNY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Monture optique Carrel - OAKLEY OX8177 817702 SATIN GREY\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 143.2}, {\"description\": \"Verre D - Eyzen Start Fit Ormis TRS GNS Cz Rock (066-ESS)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 155.59}, {\"description\": \"Verre GNS (TVA 20.0%)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}, {\"description\": \"Verre G - Eyzen Start Fit Ormis TRS GNS Cz Rock (066-ESS)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 155.59}, {\"description\": \"Verre GNS (TVA 20.0%)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}, {\"description\": \"Contrat Partenaire Adulte\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/abce009b-4202-4f31-988c-e537ecb24b81_Facture_Larraz_Monique_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Audiopathe\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"342610441\", \"doctor_name\": \"Quentin Arazo\", \"total_billed\": 1900.0, \"bill_paid\": true, \"amount_paid\": 1420.0, \"mandatory_coverage\": 480.0, \"complementary_coverage\": 0.0, \"client_part\": 1420.0, \"remaining_payment\": 0.0, \"insured_name\": \"LARRAZ MONIQUE\", \"insured_dob\": \"25-03-1937\", \"beneficiary_name\": \"LARRAZ MONIQUE\", \"beneficiary_dob\": \"25-03-1937\", \"invoice_date\": \"03-01-2025\", \"security_number\": \"143011225300231\", \"invoice_issuer\": \"Amplifon Montpellier Pr\\u00e9s d'Aren\\u00e8s\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PURE 312 3X GRANITE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 950.0}, {\"description\": \"PURE 312 3X GRANITE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 950.0}, {\"description\": \"EC - SI 2.0 M 2L\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}, {\"description\": \"EC - SI 2.0 M 2R\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b03ad336-dc01-44e1-9861-8a529da0c485_MGP_MUTUELLE_0001_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b03ad336-dc01-44e1-9861-8a529da0c485_MGP_MUTUELLE_0001_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 168.0, \"bill_paid\": true, \"amount_paid\": 168.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 168.0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VALIENTE Andr\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"2023-08-02\", \"security_number\": \"142079935016292\", \"invoice_issuer\": \"GEANT OPTICAL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"gentle80 toric multifocale diam 14.5 ry 8\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 84.0}, {\"description\": \"gentle80 toric multifocale diam 14.5 ry 8\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 84.0}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/29119db0-278e-4e3e-9e31-e9f5a79719a4_FACTURE_OPTIQUE_BERTHELOT_SERENA_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"452642481\", \"doctor_name\": null, \"total_billed\": 378.06, \"bill_paid\": true, \"amount_paid\": 292.94, \"mandatory_coverage\": 0.06, \"complementary_coverage\": 50.0, \"client_part\": 292.94, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BERTHELOT Serena\", \"beneficiary_dob\": null, \"invoice_date\": \"24-12-2024\", \"security_number\": null, \"invoice_issuer\": \"SARL ATOL OGEL LES OPTICIENS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"HOY Myosmar 1.58 Uv 0.5mm diam 60, ind 1.5, Polycarbonate, Unifocal\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 171.5}, {\"description\": \"HOY Myosmar 1.58 Uv 0.5mm diam 60, ind 1.5, Polycarbonate, Unifocal\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 171.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"061144382\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BUTORI PAULINE\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 73.13, \"mandatory_coverage\": 46.87, \"complementary_coverage\": null, \"client_part\": 73.13, \"remaining_payment\": 0, \"insured_name\": \"CHAMPOUSSIN SYLVIANE\", \"insured_dob\": \"18-03-1964\", \"beneficiary_name\": \"CHAMPOUSSIN SYLVIANE\", \"beneficiary_dob\": \"18-03-1964\", \"invoice_date\": \"28-11-2024\", \"security_number\": \"2640383137231\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"ATM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"069203487\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DASSOT SANDRINE\", \"total_billed\": 20.8, \"bill_paid\": true, \"amount_paid\": 8.32, \"mandatory_coverage\": 12.48, \"complementary_coverage\": null, \"client_part\": 8.32, \"remaining_payment\": 0, \"insured_name\": \"CHAMPOUSSIN SYLVIANE\", \"insured_dob\": \"18-03-1964\", \"beneficiary_name\": \"CHAMPOUSSIN SYLVIANE\", \"beneficiary_dob\": \"18-03-1964\", \"invoice_date\": \"28-04-2024\", \"security_number\": \"2640383137231\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM 8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/eba2fed8-d29e-46a3-8e4b-02e46cabe92d_Numerisation_20241220_2_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 40.8, \"bill_paid\": true, \"amount_paid\": 40.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Isabelle BONGOAT\", \"beneficiary_dob\": null, \"invoice_date\": \"17-10-2024\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 Day Acuvue Moist Multifocal (30) (Sph\\u00e9rique , Multifocale) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 34.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/085f8bfc-690a-4daf-9559-865274181242_Facture_CIANO_Ophtamologique_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10002135241\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SALSOU ERIC\", \"total_billed\": 67.23, \"bill_paid\": true, \"amount_paid\": 40.0, \"mandatory_coverage\": 27.23, \"complementary_coverage\": 0, \"client_part\": 40, \"remaining_payment\": null, \"insured_name\": \"CIANO OLIVIER\", \"insured_dob\": \"08-12-1976\", \"beneficiary_name\": \"CIANO OLIVIER\", \"beneficiary_dob\": \"08-12-1976\", \"invoice_date\": \"28-11-2024\", \"security_number\": \"176129202306467\", \"invoice_issuer\": \"POLE OPHTALMOLOGlQUE SAINTE LUCIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": 1, \"date_of_service\": \"28-11-2024\", \"mandatory_coverage\": 18.37, \"amount\": 45.35}, {\"description\": \"ATM\", \"quantity\": 1, \"date_of_service\": \"28-11-2024\", \"mandatory_coverage\": null, \"amount\": 21.88}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b391511b-3512-45ab-bedc-ebb2cf8a78e9_7819851_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 30.8, \"bill_paid\": true, \"amount_paid\": 30.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Charlene BRUSSET\", \"beneficiary_dob\": null, \"invoice_date\": \"14-12-2024\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PureVision 2 (6) (Sph\\u00e9rique)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 24.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/57c0b77e-4634-4a68-b116-73ceb3223a39_OPHTALMO_16_12_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"061756193\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHAROKI DIANE\", \"total_billed\": 103.43, \"bill_paid\": true, \"amount_paid\": 41.4, \"mandatory_coverage\": 62.03, \"complementary_coverage\": null, \"client_part\": 41.4, \"remaining_payment\": 0, \"insured_name\": \"BERTHELEMY Michel\", \"insured_dob\": \"16-11-1939\", \"beneficiary_name\": \"BERTHELEMY Michel\", \"beneficiary_dob\": \"16-11-1939\", \"invoice_date\": \"16-12-2024\", \"security_number\": \"139119352838\", \"invoice_issuer\": \"Madame CHAROKI DIANE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 62.03, \"amount\": 103.43}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e389253a-a9f3-4559-b704-61be921eb0d3_Facture_lentilles_decembre_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 29.5, \"bill_paid\": true, \"amount_paid\": 29.5, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 29.5, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie GUERMONT\", \"beneficiary_dob\": null, \"invoice_date\": \"10-12-2024\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Air Optix Plus Hydraglyde (6) (Sph\\u00e9rique) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 27.9}, {\"description\": \"Air Optix Plus Hydraglyde (6) (Sph\\u00e9rique) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 27.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10100951200\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GOGNEAUX LUCILE\", \"total_billed\": 59.9, \"bill_paid\": true, \"amount_paid\": 21.0, \"mandatory_coverage\": 27.23, \"complementary_coverage\": 11.67, \"client_part\": 21, \"remaining_payment\": null, \"insured_name\": \"LAURIDANT CHARLOTTE\", \"insured_dob\": \"23-01-1993\", \"beneficiary_name\": \"LAURIDANT CHARLOTTE\", \"beneficiary_dob\": \"23-01-1993\", \"invoice_date\": \"05-12-2024\", \"security_number\": \"293015950827279\", \"invoice_issuer\": \"CABINET D'OPHTALMOLOGIE DES FLANDRES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"05-12-2024\", \"mandatory_coverage\": 18.37, \"amount\": 40.41}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"05-12-2024\", \"mandatory_coverage\": 8.86, \"amount\": 19.49}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/605bfa50-15ce-48a0-87ec-201c3aa0557a_lunette_auregane_daden_001_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"292603248\", \"doctor_name\": \"LUNEDOU\", \"total_billed\": 627.9, \"bill_paid\": true, \"amount_paid\": 307.61, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 250, \"client_part\": 307.61, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DADEN Auregane\", \"beneficiary_dob\": null, \"invoice_date\": \"07-12-2024\", \"security_number\": null, \"invoice_issuer\": \"CENTRE COMMERCIAL LA GALERIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATO AMEYA - AMEYA ADV920L - 54/18 - Vert kaki cristal, Optique, Plastique, Femme, Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 159.9}, {\"description\": \"ESS (Ato) Stellest Airwear Cz Rock diam : 65, ind. 1.59, Polycarbonate, Unifocal\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 198.9}, {\"description\": \"ESS (Ato) St\\u00e9lest Airwear Cz Rock diam : 65, ind. 1.59, Polycarbonate, Unifocal\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 198.9}], \"prescripteur_finess_number\": 291761005}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3ee0ac3b-4d77-4692-80fa-15004f8a3499_facture_ophta_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3ee0ac3b-4d77-4692-80fa-15004f8a3499_facture_ophta_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"970412664\", \"doctor_name\": \"MARTIN S\\u00e9bastien\", \"total_billed\": 69.36, \"bill_paid\": true, \"amount_paid\": 20.81, \"mandatory_coverage\": 48.55, \"complementary_coverage\": 0, \"client_part\": 20.81, \"remaining_payment\": null, \"insured_name\": \"SERY ANTHONY\", \"insured_dob\": \"23-06-2002\", \"beneficiary_name\": \"SERY ANTHONY\", \"beneficiary_dob\": \"23-06-2002\", \"invoice_date\": \"25-11-2024\", \"security_number\": \"102069781608917\", \"invoice_issuer\": \"CDS VISION CLAIRE - SAINT PIERRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Tomographie unilat\\u00e9rale ou bilat\\u00e9rale de l'oeil par scanographie \\u00e0 coh\\u00e9rence optique\", \"quantity\": null, \"date_of_service\": \"25-11-2024\", \"mandatory_coverage\": 40.77, \"amount\": 58.24}, {\"description\": \"Pachym\\u00e9trie corn\\u00e9enne\", \"quantity\": null, \"date_of_service\": \"25-11-2024\", \"mandatory_coverage\": 7.78, \"amount\": 11.12}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10004433750\", \"finess_number\": \"391848611\", \"doctor_name\": \"PROST-MAGNIN Olivier\", \"total_billed\": 44.0, \"bill_paid\": true, \"amount_paid\": 44.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 44.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PERNIN Frederic\", \"beneficiary_dob\": null, \"invoice_date\": \"05-12-2024\", \"security_number\": \"175022505624413\", \"invoice_issuer\": \"Cabinet de Tavaux\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 44}, {\"description\": \"Hors Nomenclature\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": 44}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/83c61388-dac0-4d61-bbaa-c9c032ebd681_facture_ophtalmologue__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": \"10101434131\", \"finess_number\": \"021750377\", \"doctor_name\": null, \"total_billed\": 104.93, \"bill_paid\": true, \"amount_paid\": 35.0, \"mandatory_coverage\": 48.95, \"complementary_coverage\": 20.98, \"client_part\": 35.0, \"remaining_payment\": 0, \"insured_name\": \"WILLIAM BOUCENNA\", \"insured_dob\": null, \"beneficiary_name\": \"POITE DAVID\", \"beneficiary_dob\": \"17-08-1973\", \"invoice_date\": \"07-12-2024\", \"security_number\": \"173080230402707\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"BZQK001\", \"quantity\": null, \"date_of_service\": \"07-12-2024\", \"mandatory_coverage\": 39.58, \"amount\": 56.54}, {\"description\": \"BGQP009\", \"quantity\": null, \"date_of_service\": \"07-12-2024\", \"mandatory_coverage\": 9.37, \"amount\": 26.78}], \"prescripteur_finess_number\": \"021750377\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/09aafdfc-4378-4d2f-be2a-f647d5138f5b_Ordonnance_marie_lunettes_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/09aafdfc-4378-4d2f-be2a-f647d5138f5b_Ordonnance_marie_lunettes_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"920037454\", \"finess_number\": null, \"doctor_name\": \"ALBANO Vincenza\", \"total_billed\": 700.0, \"bill_paid\": true, \"amount_paid\": 403.85, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 296.06, \"client_part\": 403.85, \"remaining_payment\": null, \"insured_name\": \"ALIDOR MARIE ROSE\", \"insured_dob\": \"15-08-1977\", \"beneficiary_name\": \"ALIDOR MARIE ROSE\", \"beneficiary_dob\": \"15-08-1977\", \"invoice_date\": \"30-10-2024\", \"security_number\": \"277089712009133\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"MAR - Tom Ford - FT5951-B- Traditionnel\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 246.0}, {\"description\": \"Zeiss - Zeiss PRG Clear 3Dve+ Pro 16\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 227.0}, {\"description\": \"Zeiss - Zeiss PRG Clear 3Dve+ Pro 16\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 227.0}], \"prescripteur_finess_number\": \"920037454\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b46e8411-29d3-44b6-ae65-55f11bf360bd_FACTURE_OCULISTE_JANE_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": \"10002331949\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ZOPIE CAROLE\", \"total_billed\": 112.92, \"bill_paid\": true, \"amount_paid\": 50.2, \"mandatory_coverage\": 62.72, \"complementary_coverage\": 0, \"client_part\": 50.2, \"remaining_payment\": null, \"insured_name\": \"VANDERESSE JEANINE\", \"insured_dob\": \"04-08-1947\", \"beneficiary_name\": \"VANDERESSE JEANINE\", \"beneficiary_dob\": \"04-08-1947\", \"invoice_date\": \"28-11-2024\", \"security_number\": \"247089931220933\", \"invoice_issuer\": \"ZOPIE CORPETTI CAROLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"2024-11-28\", \"mandatory_coverage\": 34.52, \"amount\": 62.16}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"2024-11-28\", \"mandatory_coverage\": 10.2, \"amount\": 18.36}, {\"description\": \"ADI\", \"quantity\": null, \"date_of_service\": \"2024-11-28\", \"mandatory_coverage\": 18, \"amount\": 32.4}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/379ed230-4e27-4f61-9039-226972f26cc9_Factures_Dani_medecin__2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/379ed230-4e27-4f61-9039-226972f26cc9_Factures_Dani_medecin__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 26.5, \"bill_paid\": true, \"amount_paid\": 7.95, \"mandatory_coverage\": 18.55, \"complementary_coverage\": 0, \"client_part\": 7.95, \"remaining_payment\": null, \"insured_name\": \"FATOUMATA SISSOKO\", \"insured_dob\": null, \"beneficiary_name\": \"MERCIER Dani\", \"beneficiary_dob\": \"22-11-2013\", \"invoice_date\": \"19-11-2024\", \"security_number\": \"2760799341096\", \"invoice_issuer\": \"Centre Municipal de Sant\\u00e9 SAINT-DENIS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"G\", \"quantity\": null, \"date_of_service\": \"18-11-2024\", \"mandatory_coverage\": null, \"amount\": 26.5}, {\"description\": \"G\", \"quantity\": null, \"date_of_service\": \"19-03-2024\", \"mandatory_coverage\": null, \"amount\": 26.5}], \"prescripteur_finess_number\": \"930010632\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4bfa12ee-3ee3-4e5d-bee6-9cb2b0708819_SKM_C250i24112216170_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": \"10100180602\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Clemence PORTE\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 11.38, \"mandatory_coverage\": 48.62, \"complementary_coverage\": 0, \"client_part\": 11.38, \"remaining_payment\": null, \"insured_name\": \"DI ROSA GIOVANNI\", \"insured_dob\": \"26-06-1961\", \"beneficiary_name\": \"DI ROSA GIOVANNI\", \"beneficiary_dob\": \"26-06-1961\", \"invoice_date\": \"31-10-2024\", \"security_number\": \"161064209506391\", \"invoice_issuer\": \"DOCTEUR CLEMENCE PORTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ADC\", \"quantity\": null, \"date_of_service\": \"31-10-2024\", \"mandatory_coverage\": 48.62, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d8203a60-049f-4f78-befd-556987e766d7_Facture_Mme_GLACON_Adeline_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": \"10101788791\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LECOGE REGIS\", \"total_billed\": 72.23, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": 27.23, \"complementary_coverage\": 0, \"client_part\": 45.0, \"remaining_payment\": null, \"insured_name\": \"EDERHY ADELINE\", \"insured_dob\": \"14-08-1988\", \"beneficiary_name\": \"GLACON ADELINE\", \"beneficiary_dob\": null, \"invoice_date\": \"29-10-2024\", \"security_number\": \"288087644705485\", \"invoice_issuer\": \"CABINET DU DR REGIS LECOGE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 18.37, \"amount\": 48.72}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 8.86, \"amount\": 23.51}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7ac36d27-60a3-46d9-b496-a5865668fff5_20241022_facture_lunettes_zoe_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"891012783\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PEYSSONNEL Eric\", \"total_billed\": 276.0, \"bill_paid\": true, \"amount_paid\": 75.97, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 199.94, \"client_part\": 75.97, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAMERA Zo\\u00e9\", \"beneficiary_dob\": \"24-01-2018\", \"invoice_date\": \"22-10-2024\", \"security_number\": \"2851289206016\", \"invoice_issuer\": \"NOVA VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Karavan CA0152 Re 4718\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 156.0}, {\"description\": \"Mega Optic Organindividuel 1.5 Durc 60\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 60.0}, {\"description\": \"Mega Optic Organindividuel 1.6 Durc 60\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 60.0}, {\"description\": \"Articles Divers\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 24.0}], \"prescripteur_finess_number\": \"891012783\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a679270c-2360-4566-a356-dc11d0ba8fcb_2024-11-02_141241_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"771716180\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BRUYERE Elsa\", \"total_billed\": 114.03, \"bill_paid\": true, \"amount_paid\": 52.0, \"mandatory_coverage\": 43.43, \"complementary_coverage\": 18.6, \"client_part\": 52.0, \"remaining_payment\": 0.0, \"insured_name\": \"MARTINE MONIQUE\", \"insured_dob\": \"18-04-1949\", \"beneficiary_name\": \"MARTINE MONIQUE\", \"beneficiary_dob\": \"18-04-1949\", \"invoice_date\": \"02-11-2024\", \"security_number\": \"249047512019486\", \"invoice_issuer\": \"Seima-Ophtha Roissy-en-Brie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BZQK001\", \"quantity\": 1, \"date_of_service\": \"02-11-2024\", \"mandatory_coverage\": 33.52, \"amount\": 88.02}, {\"description\": \"BGQP002\", \"quantity\": 1, \"date_of_service\": \"02-11-2024\", \"mandatory_coverage\": 9.91, \"amount\": 26.01}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0be4fe84-6c9c-4a84-9435-5fe9fa700ff3_Monsieur_BURAZOVITCH_JERONIMO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": null, \"adeli_number\": \"131889925\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BUAOVITCH JERONIMO\", \"total_billed\": 108.77, \"bill_paid\": true, \"amount_paid\": 51.0, \"mandatory_coverage\": 40.44, \"complementary_coverage\": 17.33, \"client_part\": 51.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NICOLAS Regis\", \"beneficiary_dob\": \"19-11-1964\", \"invoice_date\": \"12-04-2024\", \"security_number\": \"1641171133282\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": 1, \"date_of_service\": \"12-04-2024\", \"mandatory_coverage\": 40.44, \"amount\": 51.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b5f6fab1-7920-45cd-b76e-e560077dcc65_IMG_20241022_170156_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10002305877\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BONNE Maxence\", \"total_billed\": 67.23, \"bill_paid\": true, \"amount_paid\": 40.0, \"mandatory_coverage\": 27.23, \"complementary_coverage\": 0.0, \"client_part\": 40.0, \"remaining_payment\": null, \"insured_name\": \"RENON CAROLINE\", \"insured_dob\": null, \"beneficiary_name\": \"RENON CAROLINE\", \"beneficiary_dob\": null, \"invoice_date\": \"03-10-2024\", \"security_number\": \"202116274402119\", \"invoice_issuer\": \"ARTOIS VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"03-10-2024\", \"mandatory_coverage\": 18.37, \"amount\": 26.98}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"03-10-2024\", \"mandatory_coverage\": 8.86, \"amount\": 13.02}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/01f6cc06-5720-4836-a490-1521aa928049_image_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"931854798\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PIERRE OLIVIER BARALE\", \"total_billed\": 20.8, \"bill_paid\": true, \"amount_paid\": 8.32, \"mandatory_coverage\": 12.48, \"complementary_coverage\": null, \"client_part\": 8.32, \"remaining_payment\": null, \"insured_name\": \"SAMRETH ALAIN\", \"insured_dob\": \"24-02-1966\", \"beneficiary_name\": \"SAMRETH ALAIN\", \"beneficiary_dob\": \"24-02-1966\", \"invoice_date\": \"23-09-2024\", \"security_number\": \"166029923408466\", \"invoice_issuer\": \"Dr PIERRE OLIVIER BARALE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 AMY 8.00\", \"quantity\": 1, \"date_of_service\": \"23-09-2024\", \"mandatory_coverage\": null, \"amount\": 20.8}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"931854798\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PIERRE OLIVIER BARALE\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 43.28, \"mandatory_coverage\": 26.72, \"complementary_coverage\": null, \"client_part\": 43.28, \"remaining_payment\": null, \"insured_name\": \"SAMRETH ALAIN\", \"insured_dob\": \"24-02-1966\", \"beneficiary_name\": \"SAMRETH ALAIN\", \"beneficiary_dob\": \"24-02-1966\", \"invoice_date\": \"23-09-2024\", \"security_number\": \"166029923408466\", \"invoice_issuer\": \"Dr PIERRE OLIVIER BARALE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BGQP002 Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": 1, \"date_of_service\": \"23-09-2024\", \"mandatory_coverage\": null, \"amount\": 56.93}, {\"description\": \"BDQP003 Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": 1, \"date_of_service\": \"23-09-2024\", \"mandatory_coverage\": null, \"amount\": 13.07}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/45ccffcd-5fe8-418b-80c2-894086de9087_facture_ophtalmo_Fred__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10004430996\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"NANDU ALINE\", \"total_billed\": 69.36, \"bill_paid\": true, \"amount_paid\": 20.81, \"mandatory_coverage\": 48.55, \"complementary_coverage\": 0.0, \"client_part\": 20.81, \"remaining_payment\": null, \"insured_name\": \"GERAC JEAN-FREDERIC P\", \"insured_dob\": null, \"beneficiary_name\": \"GERAC JEAN-FREDERIC P\", \"beneficiary_dob\": null, \"invoice_date\": \"01-10-2024\", \"security_number\": \"178089542802730\", \"invoice_issuer\": \"OPHTALEU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 58.24}, {\"description\": \"R\\u00e9vision du contact\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 11.12}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c7bbe834-632d-4fb6-8e24-c9c5caf488bc_IMG_4966_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6322cbfc-1ddc-4208-a524-8e16da3c31a5_Facture_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"621044924\", \"finess_number\": null, \"doctor_name\": \"Carballet Laurent\", \"total_billed\": 233.2, \"bill_paid\": true, \"amount_paid\": 233.2, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 60.0, \"client_part\": 173.11, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sarah BONIFACE\", \"beneficiary_dob\": \"28-02-2008\", \"invoice_date\": \"08-10-2024\", \"security_number\": \"179096204109316\", \"invoice_issuer\": \"Krys Beaurains - Tilloy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Classic Line - 12, rue Marcellin - FR 93500\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 103.2}, {\"description\": \"Optiswiss France Sarl-43,, rue de St-Louis - FR 94300\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"Optiplas DUAL+UV 150 Super antireflet\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Optiswiss France Sarl-43, r. de St-Louis-FR-68330\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"Optiplas DUAL+UV 150 \", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": \"62269773\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/beb224ad-6c43-4abc-b80c-d438d00a77c2_Facture_TP_Client_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"691715411\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CENTRE KLEBER DR LEREUIL\", \"total_billed\": 1100, \"bill_paid\": true, \"amount_paid\": 1100, \"mandatory_coverage\": 0.09, \"complementary_coverage\": null, \"client_part\": 1100, \"remaining_payment\": null, \"insured_name\": \"BAJARD JACQUES\", \"insured_dob\": \"03-03-1944\", \"beneficiary_name\": \"BAJARD JACQUES\", \"beneficiary_dob\": \"03-03-1944\", \"invoice_date\": \"03-10-2024\", \"security_number\": \"144038816000174\", \"invoice_issuer\": \"Optique des Createurs\", \"currency\": \"EUR\", \"items\": [{\"description\": \"THEO - MILLE+88\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 244.0}, {\"description\": \"ESSILOR - Varus Physio 3 Orma\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 428.0}, {\"description\": \"ESSILOR - Varus Physio 3 Orma\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 428.0}], \"prescripteur_finess_number\": \"692631823\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/aa46e86a-870c-4d0c-b100-b004535694fe_Factures_lentilles_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"651001638\", \"rpps_number\": \"10002823275\", \"finess_number\": null, \"doctor_name\": \"LEPOUTERE Bruno\", \"total_billed\": 55.8, \"bill_paid\": true, \"amount_paid\": 19.6, \"mandatory_coverage\": 0.0, \"complementary_coverage\": 36.2, \"client_part\": 19.6, \"remaining_payment\": null, \"insured_name\": \"DUCASSE LORIE\", \"insured_dob\": \"15-06-2001\", \"beneficiary_name\": \"DUCASSE LORIE\", \"beneficiary_dob\": \"15-06-2001\", \"invoice_date\": \"01-10-2024\", \"security_number\": \"201066544011113\", \"invoice_issuer\": \"LES OPTICIS MUTUALISTES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CooperVision - BIOFINITY - 14/8.6 (Mensuelles)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 27.9}, {\"description\": \"CooperVision - BIOFINITY - 14/8.6 (Mensuelles)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 27.9}], \"prescripteur_finess_number\": \"652600305\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9471b955-4dae-413a-9453-b846f0224a6d_soins_ruth_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"971312376\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ruth ESTRIPLET\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"NOEL Aalynscia\", \"insured_dob\": \"12-04-2019\", \"beneficiary_name\": \"NOEL Aalynscia\", \"beneficiary_dob\": \"12-04-2019\", \"invoice_date\": \"30-01-2024\", \"security_number\": \"297049730297616\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"CS\", \"quantity\": null, \"date_of_service\": \"30-01-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b410345a-31de-4397-a896-bc11d97e524c_FACTURE_OPHTALMOLOGISTE_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b410345a-31de-4397-a896-bc11d97e524c_FACTURE_OPHTALMOLOGISTE_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"131889941\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SAND THIERRY\", \"total_billed\": 20.8, \"bill_paid\": true, \"amount_paid\": 8.32, \"mandatory_coverage\": 12.48, \"complementary_coverage\": null, \"client_part\": 8.32, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BARBERIS Philippe\", \"beneficiary_dob\": \"31-12-1973\", \"invoice_date\": \"23-09-2024\", \"security_number\": \"1731213054047\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY 8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 12.48, \"amount\": 20.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e7a119e4-94fb-46c5-ae80-07f96e6aeecb_Screenshot_2024-09-23-12-29-45-534_com.google.android.apps.docs_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"942817354\", \"rpps_number\": null, \"finess_number\": \"942614405\", \"doctor_name\": null, \"total_billed\": 55.88, \"bill_paid\": true, \"amount_paid\": 55.88, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Antoine GODET\", \"beneficiary_dob\": null, \"invoice_date\": \"02-07-2024\", \"security_number\": null, \"invoice_issuer\": \"L COMMERCE SNC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acuvue Oasys for astigmatism Corrections OEIL GAUCHE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 27.94}, {\"description\": \"Acuvue Oasys for astigmatism Corrections OEIL DROIT\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 27.94}], \"prescripteur_finess_number\": \"942614405\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 147, \"bill_paid\": true, \"amount_paid\": 86.1, \"mandatory_coverage\": 60.9, \"complementary_coverage\": 86.1, \"client_part\": 0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gabriel Swertvaeghe\", \"beneficiary_dob\": null, \"invoice_date\": \"18-09-2024\", \"security_number\": \"191101436624355\", \"invoice_issuer\": \"Lunettes Pour Tous\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LPT Supply Chain (6 Rue de la branche, 75001 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.4, \"amount\": 30}, {\"description\": \"LPT Supply Chain (6 Rue de la branche, 75001 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}, {\"description\": \"GlassProtect Basic\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}, {\"description\": \"Sous-total Offre 100% Sante\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 42, \"amount\": 42}], \"prescripteur_finess_number\": \"752619379\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9901adc0-96d4-4cfd-a028-8286fd1f7841_Fre_Ophtalmo_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"311068670\", \"rpps_number\": \"10002863412\", \"finess_number\": null, \"doctor_name\": \"THEVENOT FRANCOISE\", \"total_billed\": 70.69, \"bill_paid\": true, \"amount_paid\": 21.2, \"mandatory_coverage\": 49.49, \"complementary_coverage\": 0, \"client_part\": 21.2, \"remaining_payment\": null, \"insured_name\": \"BECRET JACQUES\", \"insured_dob\": null, \"beneficiary_name\": \"BECRET JACQUES\", \"beneficiary_dob\": null, \"invoice_date\": \"11-09-2024\", \"security_number\": \"166101106910827\", \"invoice_issuer\": \"CABINET DU FRANCOIS THEVENOT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 39.58, \"amount\": 58.54}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 9.91, \"amount\": 14.15}], \"prescripteur_finess_number\": null, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"691714190\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"PERRILLAT-COLLOMB NICOLAS\", \"total_billed\": 76.0, \"bill_paid\": true, \"amount_paid\": 48.77, \"mandatory_coverage\": 27.23, \"complementary_coverage\": null, \"client_part\": 48.77, \"remaining_payment\": null, \"insured_name\": \"PERNA Isabelle\", \"insured_dob\": \"05-07-1975\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"24-01-2024\", \"security_number\": \"2750769238015\", \"invoice_issuer\": \"Cabinet Medical Ophtalmologique Point Vision Lyon-Montrochet\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3d649b61-08a4-4823-9745-ec9679060668_Numerisation_20240917_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Pharmacie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 12.78, \"bill_paid\": true, \"amount_paid\": 3.59, \"mandatory_coverage\": 5.51, \"complementary_coverage\": 3.36, \"client_part\": 3.59, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HUITOREL JUSTIN\", \"beneficiary_dob\": null, \"invoice_date\": \"12-09-2024\", \"security_number\": null, \"invoice_issuer\": \"PHARMACIE ZERBIB - GOLBOL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"VISMED MULTI S ocul st\\u00e9r lubrifi\\u00e9e FL/15ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 5.51, \"amount\": 12.78}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a4949824-1296-4d97-a46a-6173dee1f3ad_FACTURE_LENTILLES_09_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 81.7, \"bill_paid\": true, \"amount_paid\": 81.7, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie-Line BERNARD\", \"beneficiary_dob\": null, \"invoice_date\": \"13-09-2024\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Air Optix Plus Hydraglyde (6) (Sph\\u00e9rique) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 27.9}, {\"description\": \"Air Optix plus Hydraglyde for Astigmatism (6) (Torique) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.9}], \"prescripteur_finess_number\": 752617001}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/93ab49a6-e81d-43be-bcd0-956a68d55ada_CamScanner_12-09-2024_10.57_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"JEAN MARC MALIK\", \"beneficiary_dob\": null, \"invoice_date\": \"11-09-2024\", \"security_number\": null, \"invoice_issuer\": \"Diana Tabares Optica SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LENT PROGRESIVO DIGITAL CON TRANSITONS Y CRIZAL\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 1900000}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/05ccbd7f-e8ad-499f-9a0f-68ecdb8751d2_BLANCARD_MATHIEU_FACT_OPHTALMA_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"971745583\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BAUDOT AUDREY\", \"total_billed\": 65.0, \"bill_paid\": true, \"amount_paid\": 32.29, \"mandatory_coverage\": 32.71, \"complementary_coverage\": null, \"client_part\": 32.29, \"remaining_payment\": null, \"insured_name\": \"BLANCARD MATHIEU\", \"insured_dob\": \"11-01-1998\", \"beneficiary_name\": \"BLANCARD MATHIEU\", \"beneficiary_dob\": \"11-01-1998\", \"invoice_date\": \"12-09-2024\", \"security_number\": \"1980106088382\", \"invoice_issuer\": \"BAUDOT AUDREY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 ATM 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 32.71, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5ca60afd-d678-4a35-8385-38076bfd5f1f_BLANCARD_JEAN_LOUIS_FCT_OPHTALMO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"971745583\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BAUDOT AUDREY\", \"total_billed\": 60.0, \"bill_paid\": true, \"amount_paid\": 38.83, \"mandatory_coverage\": 21.17, \"complementary_coverage\": null, \"client_part\": 38.83, \"remaining_payment\": null, \"insured_name\": \"BLANCARD JEAN-LOUIS\", \"insured_dob\": \"11-08-1960\", \"beneficiary_name\": \"BLANCARD JEAN-LOUIS\", \"beneficiary_dob\": \"11-08-1960\", \"invoice_date\": \"12-09-2024\", \"security_number\": \"1600899353350\", \"invoice_issuer\": \"Madame BAUDOT AUDREY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 ATM 1\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 21.17, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/626c99a4-986b-401f-b91e-9b6c0f15cce6_Commande_n100691223_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 39.51, \"bill_paid\": true, \"amount_paid\": 39.51, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Elise Theveny\", \"beneficiary_dob\": null, \"invoice_date\": \"09-09-2024\", \"security_number\": null, \"invoice_issuer\": \"Optik Kaufman SA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Biofinity Oeil Sph\\u00e9rie Diam\\u00e9tre Rayon\", \"quantity\": 2, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 43.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": \"10003394813\", \"finess_number\": null, \"doctor_name\": \"MILIN Anne v\\u00e9ronique\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NOURDINE Angel\", \"beneficiary_dob\": null, \"invoice_date\": \"28-09-2024\", \"security_number\": \"191039939703035\", \"invoice_issuer\": \"Cabinet d'Aix\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Actes - produits\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.8}, {\"description\": \"Hors Nomenclature\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 29.2}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Pharmacie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 10.9, \"bill_paid\": true, \"amount_paid\": 10.9, \"mandatory_coverage\": 0, \"complementary_coverage\": 0, \"client_part\": 10.9, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"NOURDINE Angel\", \"beneficiary_dob\": null, \"invoice_date\": \"02-09-2024\", \"security_number\": null, \"invoice_issuer\": \"selarl PHARMACIE LES MILLES\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SKIACOL COLLY\", \"quantity\": 1, \"date_of_service\": \"02-09-2024\", \"mandatory_coverage\": 0, \"amount\": 10.9}], \"prescripteur_finess_number\": \"13185964\"}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/84624e2b-50e3-42cc-8ced-76dc83475a19_LUNETTES_OPTICAL_CENTER_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 474.4, \"bill_paid\": true, \"amount_paid\": 284.34, \"mandatory_coverage\": 0.06, \"complementary_coverage\": 190.0, \"client_part\": 284.34, \"remaining_payment\": null, \"insured_name\": \"MAGUIN CELINE\", \"insured_dob\": \"02-01-1977\", \"beneficiary_name\": \"MAGUIN CELINE\", \"beneficiary_dob\": \"02-01-1977\", \"invoice_date\": \"26-07-2024\", \"security_number\": \"277010836301161\", \"invoice_issuer\": \"OPTICAL CENTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BBGR - Nikon P.Definit P 14 1.60 PurBlueUV NDBUV - 65/70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 190.0}, {\"description\": \"BBGR - Nikon P.Definit P 14 1.60 PurBlueUV NDBUV - 65/70\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 190.0}, {\"description\": \"BBGR - PACK CONF L INFINI HD 1.5 Serenity ARMC - 65/65\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.2}, {\"description\": \"BBGR - PACK CONF L INFINI HD 1.5 Serenity ARMC - 65/65\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.2}], \"prescripteur_finess_number\": \"542667274\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 187, \"bill_paid\": true, \"amount_paid\": 114.7, \"mandatory_coverage\": 72.3, \"complementary_coverage\": 114.7, \"client_part\": 0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Amelie Ounzerfi\", \"beneficiary_dob\": null, \"invoice_date\": \"03-08-2024\", \"security_number\": \"292049134530694\", \"invoice_issuer\": \"Lunettes Pour Tous\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.4, \"amount\": 30}, {\"description\": \"LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 9.45, \"amount\": 52.5}, {\"description\": \"LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 9.45, \"amount\": 52.5}, {\"description\": \"Suppl\\u00e9mentation\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6, \"amount\": 10}, {\"description\": \"Suppl\\u00e9mentation\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 42, \"amount\": 42}, {\"description\": \"Protection\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0}], \"prescripteur_finess_number\": \"942631938\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 167.0, \"bill_paid\": true, \"amount_paid\": 98.3, \"mandatory_coverage\": 68.7, \"complementary_coverage\": 98.3, \"client_part\": 0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emnah Ounzerfi\", \"beneficiary_dob\": null, \"invoice_date\": \"03-08-2024\", \"security_number\": \"292049134530694\", \"invoice_issuer\": \"Lunettes Pour Tous\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.4, \"amount\": 30}, {\"description\": \"LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 7.65, \"amount\": 42.5}, {\"description\": \"LPT Supply Chain (6 Rue de braque, 75003 Paris)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 7.65, \"amount\": 42.5}, {\"description\": \"Glass Protect Basic\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0}, {\"description\": \"Suppl\\u00e9ment: Preparation d'adaptation\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.0, \"amount\": 10.0}, {\"description\": \"Suppl\\u00e9ment:\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 42, \"amount\": 42}], \"prescripteur_finess_number\": \"942631938\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 24.98, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"LEMPEREUR SYLVIE\", \"insured_dob\": \"25-03-1972\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"272037654044830\", \"invoice_issuer\": \"MGP SANTE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PARCOURS COORDONNE - ACCES DIRECT AUTORISE\", \"quantity\": null, \"date_of_service\": \"30-07-2024\", \"mandatory_coverage\": null, \"amount\": 28.29}, {\"description\": \"PARCOURS COORDONNE - ACCES DIRECT AUTORISE\", \"quantity\": null, \"date_of_service\": \"30-07-2024\", \"mandatory_coverage\": null, \"amount\": 13.12}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bae2e636-32c5-4b2c-b2d2-a7b28b30760f_KM_C250i24082011140_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"641757828\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"MASSE-NAVETTE SOPHIE\", \"total_billed\": 110.48, \"bill_paid\": true, \"amount_paid\": 53.0, \"mandatory_coverage\": 57.48, \"complementary_coverage\": null, \"client_part\": 53.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GONZALEZ Christophe\", \"beneficiary_dob\": \"29-05-1976\", \"invoice_date\": \"13-08-2024\", \"security_number\": \"1760564422045\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/484d05c2-b89b-4ec0-8968-9ae6a5360f6a_DetailDesPaiements_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/484d05c2-b89b-4ec0-8968-9ae6a5360f6a_DetailDesPaiements_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"131025397\", \"doctor_name\": \"MAHFUD ALI Abdurraof\", \"total_billed\": 165.0, \"bill_paid\": true, \"amount_paid\": 60.0, \"mandatory_coverage\": 18.9, \"complementary_coverage\": 86.1, \"client_part\": 60, \"remaining_payment\": null, \"insured_name\": \"BARRET BRUNO\", \"insured_dob\": null, \"beneficiary_name\": \"BARRET LENA\", \"beneficiary_dob\": \"14-08-2017\", \"invoice_date\": \"24-02-2024\", \"security_number\": \"18208130559263\", \"invoice_issuer\": \"Jolies Mirettes\", \"currency\": \"EUR\", \"items\": [{\"description\": \"KOPTICAL RAC0 Unif. 1.5 SAR ST 24 Org 65\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}, {\"description\": \"KOPTICAL RAC0 Unif. 1.5 SAR ST 24 Org 65\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.75, \"amount\": 37.5}, {\"description\": \"KOPTICAL WIKIBOOM 1503 Noir brillant 46/19 (1)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 5.4, \"amount\": 30}, {\"description\": \"KOPTICAL K 15 ST 24 Org 65\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"KOPTICAL K 15 ST 24 Org 65\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"GVO NANO CAMPER FRAMBOISE 44 (2)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": \"131025397\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e72e49e0-7891-4dc8-b62f-40c7bf6252a4_Nuria_Ravier_Ophtalomologiste_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e72e49e0-7891-4dc8-b62f-40c7bf6252a4_Nuria_Ravier_Ophtalomologiste_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"NAULEAU Elodie\", \"total_billed\": 22.1, \"bill_paid\": true, \"amount_paid\": 8.84, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 8.84, \"remaining_payment\": 0, \"insured_name\": \"BOUZID Houda\", \"insured_dob\": null, \"beneficiary_name\": \"RAVIER Nuria\", \"beneficiary_dob\": \"19-06-2008\", \"invoice_date\": \"27-06-2023\", \"security_number\": \"2711099352064\", \"invoice_issuer\": \"CENTRE ACCES VISION TOURS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acuite visuelle et refrac AMY\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 22.1}], \"prescripteur_finess_number\": \"370015950\", \"is_rotated\": true}, {\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FRAGUAS Jose Luis\", \"total_billed\": 20.83, \"bill_paid\": true, \"amount_paid\": 6.25, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 6.25, \"remaining_payment\": 0, \"insured_name\": \"BOUZID Houda\", \"insured_dob\": null, \"beneficiary_name\": \"RAVIER Nuria\", \"beneficiary_dob\": \"19-06-2008\", \"invoice_date\": \"27-06-2023\", \"security_number\": \"2711099352064\", \"invoice_issuer\": \"CENTRE ACCES VISION TOURS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ADI\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 20.83}], \"prescripteur_finess_number\": \"370015950\", \"is_rotated\": true}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": \"10101552940\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LABoudi Karim\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 32.24, \"mandatory_coverage\": 87.76, \"complementary_coverage\": 0, \"client_part\": 32.24, \"remaining_payment\": null, \"insured_name\": \"D'ARGY BEATRICE\", \"insured_dob\": null, \"beneficiary_name\": \"D'ARGY BEATRICE\", \"beneficiary_dob\": null, \"invoice_date\": \"21-05-2024\", \"security_number\": \"28104605716863\", \"invoice_issuer\": \"V3V\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"2024-05-21\", \"mandatory_coverage\": 56.54, \"amount\": 83.97}, {\"description\": \"ADI\", \"quantity\": null, \"date_of_service\": \"2024-05-21\", \"mandatory_coverage\": 10.42, \"amount\": 15.23}, {\"description\": \"AMY8\", \"quantity\": null, \"date_of_service\": \"2024-05-21\", \"mandatory_coverage\": 20.8, \"amount\": 20.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/347d144b-1ebf-4d96-8b25-a17f7f04c736_fact_ophtalmo_06.08.24_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"761708361\", \"rpps_number\": \"10001929800\", \"finess_number\": null, \"doctor_name\": \"J\\u00c9R\\u00d4ME MASSY\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 37, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 37, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jennifer AUBERT\", \"beneficiary_dob\": \"02-12-1976\", \"invoice_date\": \"06-08-2024\", \"security_number\": \"2761288160017\", \"invoice_issuer\": \"CENTRE MEDICAL OPHTALMOLOGlQUE POINT VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation ophtalmologique\", \"quantity\": 1, \"date_of_service\": \"06-08-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/96be3553-7261-4698-bf20-88ca5c826889_ReleveMensuel_Mai_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/96be3553-7261-4698-bf20-88ca5c826889_ReleveMensuel_Mai_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 154.04, \"bill_paid\": true, \"amount_paid\": 154.04, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DISSEZ PATRICK\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION\", \"quantity\": 1, \"date_of_service\": \"16-05-2024\", \"mandatory_coverage\": null, \"amount\": 6.9}, {\"description\": \"CONSULTATION\", \"quantity\": 1, \"date_of_service\": \"16-05-2024\", \"mandatory_coverage\": null, \"amount\": 3.95}, {\"description\": \"HONORAIRES HOSPITALIERS\", \"quantity\": 1, \"date_of_service\": \"17-05-2024\", \"mandatory_coverage\": null, \"amount\": 144.95}, {\"description\": \"PHARMACIE\", \"quantity\": 1, \"date_of_service\": \"18-04-2024\", \"mandatory_coverage\": null, \"amount\": 3.02}, {\"description\": \"PHARMACIE\", \"quantity\": 1, \"date_of_service\": \"18-04-2024\", \"mandatory_coverage\": null, \"amount\": 1.76}, {\"description\": \"HONORAIRES PHARMCIEN\", \"quantity\": 1, \"date_of_service\": \"18-04-2024\", \"mandatory_coverage\": null, \"amount\": 0.51}, {\"description\": \"APPAREIL\", \"quantity\": 1, \"date_of_service\": \"18-04-2024\", \"mandatory_coverage\": null, \"amount\": 2.32}, {\"description\": \"APPAREIL\", \"quantity\": 1, \"date_of_service\": \"18-04-2024\", \"mandatory_coverage\": null, \"amount\": 1.73}, {\"description\": \"MONTURE\", \"quantity\": 1, \"date_of_service\": \"03-05-2024\", \"mandatory_coverage\": null, \"amount\": 149.97}, {\"description\": \"VERRES\", \"quantity\": 1, \"date_of_service\": \"03-05-2024\", \"mandatory_coverage\": null, \"amount\": 220.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f015c1fb-ac12-4d99-bbf9-8dc53bcf1ada_Facture_dentaire_du_29-07_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"330059932\", \"doctor_name\": \"QUESADA Teresa\", \"total_billed\": 111.97, \"bill_paid\": true, \"amount_paid\": 44.79, \"mandatory_coverage\": 67.18, \"complementary_coverage\": 0, \"client_part\": 44.79, \"remaining_payment\": null, \"insured_name\": \"AHAMADI ABDOU MANSONR\", \"insured_dob\": \"29-01-1994\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"29-07-2024\", \"security_number\": \"194019939705525\", \"invoice_issuer\": \"CSD DE L'ABSBD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Exr\\u00e9sion de la pulpe vivante d'une molaire permanente\", \"quantity\": 1, \"date_of_service\": \"29-07-2024\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Radiographie-introbuccale r\\u00e9trobulcale par voie endodonte\", \"quantity\": 1, \"date_of_service\": \"29-07-2024\", \"mandatory_coverage\": null, \"amount\": 11.97}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 39.7, \"bill_paid\": true, \"amount_paid\": 39.7, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FIONA GERVAISE DE BIAGIONI\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2024\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ACUVUE Oasys (6) (Lentilles sph\\u00e9riques) Oeil droit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 18.9}, {\"description\": \"ACUVUE Oasys (6) (Lentilles sph\\u00e9riques) Oeil gauche\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 18.9}, {\"description\": \"Acuvue RevitaLens 100ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 4.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 2.9}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -2.9}, {\"description\": \"R\\u00e9duction produit(s)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -3.0}], \"prescripteur_finess_number\": \"752617001\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b8083bbb-9489-4706-af99-69f625bbc530_facture_OPHALMOLOGIE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": \"10100820405\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ADDOU-REGNARD MANAR\", \"total_billed\": 80.0, \"bill_paid\": true, \"amount_paid\": 40.8, \"mandatory_coverage\": 39.2, \"complementary_coverage\": 0, \"client_part\": 40.8, \"remaining_payment\": null, \"insured_name\": \"FOURNIER GHISLAINE\", \"insured_dob\": \"28-01-1957\", \"beneficiary_name\": \"FOURNIER GHISLAINE\", \"beneficiary_dob\": \"28-01-1957\", \"invoice_date\": \"25-07-2024\", \"security_number\": \"257017817405128\", \"invoice_issuer\": \"DOCTEUR ADDOU-REGNARD ET KAMAMI-LEVY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 19.8, \"amount\": 43.87}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 6.92, \"amount\": 15.33}, {\"description\": \"AMY8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 12.48, \"amount\": 20.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"101721165\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ABBAS MAZIN\", \"total_billed\": 127.77, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": 57.77, \"complementary_coverage\": null, \"client_part\": 70.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEROY Joel\", \"beneficiary_dob\": \"13-08-1946\", \"invoice_date\": \"04-07-2024\", \"security_number\": \"1460810387057\", \"invoice_issuer\": \"Centre M\\u00e9dical Ophtalmologique Point Vision Troyes\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"CCAM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fa95b732-7a88-4fea-a8e7-9975dbbc69a8_Facture_0122202407740700218_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"910110014\", \"doctor_name\": \"CH D ARPAJON\", \"total_billed\": 238.03, \"bill_paid\": true, \"amount_paid\": 118.0, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 119.94, \"client_part\": 118.0, \"remaining_payment\": null, \"insured_name\": \"EDGARD Nelson\", \"insured_dob\": null, \"beneficiary_name\": \"EDGARD Nelson\", \"beneficiary_dob\": null, \"invoice_date\": \"04-07-2024\", \"security_number\": \"1 73 11 99 341 076\", \"invoice_issuer\": \"G\\u00e9n\\u00e9rale d'Optique BRETAGNY SUR ORGE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Monture optique Nylor - RAY-BAN RX6513 3135 MATTE GUMETAL ON GUNMETAL (TVA 20.00%)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 113.03}, {\"description\": \"Verre D - Nikon Sph 1.60 SCBUV 71-/BBG (TVA 20.00%)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 54.2}, {\"description\": \"Slim / OptiSlim (TVA 20.00%)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 0.0}, {\"description\": \"Verre G - Nikon Sph 1.60 SCBUV 71-/BBG (TVA 20.00%)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 50.8}, {\"description\": \"Slim / OptiSlim (TVA 20.00%)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"Contrat Partenaire Adulte (TVA 20.00%)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": \"912604659\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/72453850-922f-4886-b687-93376e00ba68_Frais_Ophtalmologue_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ab281836-3881-4ac0-a920-f886d41e71b3_facture_Axium_du_070624_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ab281836-3881-4ac0-a920-f886d41e71b3_facture_Axium_du_070624_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 1760.72, \"bill_paid\": true, \"amount_paid\": 585.0, \"mandatory_coverage\": 1175.72, \"complementary_coverage\": null, \"client_part\": 585.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PRICART Jean-Marc\", \"beneficiary_dob\": \"25-04-1963\", \"invoice_date\": \"11-06-2024\", \"security_number\": \"163042903922584\", \"invoice_issuer\": \"CLINIQUE AXIUM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"C CHS\", \"quantity\": 1, \"date_of_service\": \"07-06-2024\", \"mandatory_coverage\": 786.36, \"amount\": 786.36}, {\"description\": \"PAT\", \"quantity\": 1, \"date_of_service\": \"07-06-2024\", \"mandatory_coverage\": null, \"amount\": 24.0}, {\"description\": \"IMP\", \"quantity\": 1, \"date_of_service\": \"07-06-2024\", \"mandatory_coverage\": null, \"amount\": 101.0}, {\"description\": \"ADC\", \"quantity\": 1, \"date_of_service\": \"07-06-2024\", \"mandatory_coverage\": 271.7, \"amount\": 671.7}, {\"description\": \"ADA\", \"quantity\": 1, \"date_of_service\": \"07-06-2024\", \"mandatory_coverage\": 177.66, \"amount\": 177.66}], \"prescripteur_finess_number\": \"130810740\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7ef7306d-400e-4a56-b31c-02237212c470_Scan_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SEIFEDDINE DAVID\", \"total_billed\": 72.23, \"bill_paid\": true, \"amount_paid\": 45.0, \"mandatory_coverage\": 27.23, \"complementary_coverage\": null, \"client_part\": 45.0, \"remaining_payment\": null, \"insured_name\": \"GUMUS EROL\", \"insured_dob\": \"18-08-1993\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"28-03-2024\", \"security_number\": \"1930838544079\", \"invoice_issuer\": \"SEIFEDDINE DAVID\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Actes techniques m\\u00e9dicaux (hors imaginaire)\", \"quantity\": null, \"date_of_service\": \"28-03-2024\", \"mandatory_coverage\": null, \"amount\": 56.24}, {\"description\": \"Actes techniques m\\u00e9dicaux (hors imaginaire)\", \"quantity\": null, \"date_of_service\": \"28-03-2024\", \"mandatory_coverage\": null, \"amount\": 15.99}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9494858a-65e3-425d-a452-17d4300f0115_demande_de_remboursement_mgp_mutuelle_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d63b7f97-36f9-4f4e-aa4d-64e2efb17678_Mon_scan_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Youness LAMARI\", \"total_billed\": 88.9, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": 50.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAHTOUT Oualid\", \"beneficiary_dob\": null, \"invoice_date\": \"16-07-2024\", \"security_number\": null, \"invoice_issuer\": \"CMO POINT VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"16-07-2024\", \"mandatory_coverage\": null, \"amount\": 88.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a9d9caa3-cba1-4111-9ead-658ced5e6cbc_Quittance_N362_MVOIR_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"101780013\", \"doctor_name\": \"PREVOT ERIC\", \"total_billed\": 664.0, \"bill_paid\": true, \"amount_paid\": 249.0, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 414.91, \"client_part\": 249.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DA SILVA Antonio\", \"beneficiary_dob\": \"24-07-1970\", \"invoice_date\": \"12-07-2024\", \"security_number\": \"1700764445237\", \"invoice_issuer\": \"M'VOIR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"MAF Lacoste - L2707 - 53/15 - Blue/Green, Optique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 169.0}, {\"description\": \"MEG Kod Unique Dro Hd 1.5 Totalclear diam 65/70 - 1.5 Organic, Progressif\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 247.5}, {\"description\": \"MEG Kod Unique Dro Hd 1.5 Totalclear diam 65/70 - 1.5 Organic, Progressif\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 247.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c516575f-37f9-4c9f-bfdc-4947b4a9d9d9_MARCMANSUY_Facture_ophtalmo_JF_FAURE_24062024_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FAURE JEAN-FRANCOIS\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 76.57, \"mandatory_coverage\": 43.43, \"complementary_coverage\": null, \"client_part\": 76.57, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARC LIVIER\", \"beneficiary_dob\": \"18-12-1994\", \"invoice_date\": \"24-06-2024\", \"security_number\": \"1941275113686\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": null, \"date_of_service\": \"24-06-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"Actes techniques m\\u00e9dicaux (hors imagerie)\", \"quantity\": null, \"date_of_service\": \"24-06-2024\", \"mandatory_coverage\": null, \"amount\": 40.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/57188e7d-280a-4f94-9d7d-6af62ede30ed_lunettes_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"651012130\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VIORRAIN Ellette\", \"total_billed\": 95.0, \"bill_paid\": true, \"amount_paid\": 47.12, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAPLACE Joel\", \"beneficiary_dob\": null, \"invoice_date\": \"11-06-2024\", \"security_number\": null, \"invoice_issuer\": \"Optalmologe Pyr\\u00e9n\\u00e9es Bigorr\\u00e9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Tomographie unilat\\u00e9rale ou bilat\\u00e9rale de l'oeil par scanographie \\u00e0 coh\\u00e9rence optique\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 95.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2f0d42d8-930e-46cb-9222-f421480516db_IMG_9332_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 156.0, \"bill_paid\": true, \"amount_paid\": 156.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AMOROS Manuel\", \"beneficiary_dob\": null, \"invoice_date\": \"01-06-2024\", \"security_number\": null, \"invoice_issuer\": \"SARL LILTI VISION\", \"currency\": \"EUR\", \"items\": [{\"description\": \"OPH OPHTALMIC HR 1 DAY 90L, diam : 14.1, ray : 8.6, Souple, Journalier, Ren: 1 jour (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 78.0}, {\"description\": \"OPH OPHTALMIC HR 1 DAY 90L, diam : 14.1, ray : 8.6, Souple, Journalier, Ren: 1 jour (par 90)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 78.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0d39ace6-2c29-4987-a5c8-ca0ec40d3fd4_Facture_consultation_ophtalmologique_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"1317525453\", \"doctor_name\": \"Sacha GUEUNOUN\", \"total_billed\": 65.1, \"bill_paid\": true, \"amount_paid\": 49.0, \"mandatory_coverage\": 16.1, \"complementary_coverage\": 49.0, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Manuel Liminana\", \"beneficiary_dob\": \"25-09-1982\", \"invoice_date\": \"16-05-2024\", \"security_number\": \"182091305589666\", \"invoice_issuer\": \"Sacha GUEUNOUN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"TC 1\", \"quantity\": 1, \"date_of_service\": \"16-05-2024\", \"mandatory_coverage\": 16.1, \"amount\": 65.1}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/23892d21-d122-4c24-a278-a979cbfcd526_facture_ophtalmo_Louise_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10107362989\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VEIGA DE LA JARA CARLOS\", \"total_billed\": 61.0, \"bill_paid\": true, \"amount_paid\": 32.02, \"mandatory_coverage\": 28.98, \"complementary_coverage\": 0, \"client_part\": 31.02, \"remaining_payment\": null, \"insured_name\": \"LARRIEU MARILYS\", \"insured_dob\": null, \"beneficiary_name\": \"SY LOUISE\", \"beneficiary_dob\": null, \"invoice_date\": \"05-06-2024\", \"security_number\": \"286046443003106\", \"invoice_issuer\": \"BEARN OPHTALMOS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 19.8, \"amount\": 41.67}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 9.18, \"amount\": 19.33}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4f2c784c-f3a3-4ffa-a7a6-d9ab9b905161_facturelentilles_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"351042767 \", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LANGANAY Isabelle\", \"total_billed\": 641.8, \"bill_paid\": true, \"amount_paid\": 594.42, \"mandatory_coverage\": 47.38, \"complementary_coverage\": 0, \"client_part\": 594.42, \"remaining_payment\": null, \"insured_name\": \"SCHNEBELEN JEAN PIERRE\", \"insured_dob\": \"12-11-1950\", \"beneficiary_name\": \"SCHNEBELEN JEAN PIERRE\", \"beneficiary_dob\": \"12-11-1950\", \"invoice_date\": \"17-05-2024\", \"security_number\": \"1501121231078\", \"invoice_issuer\": \"LPO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"LENTILLE DE CONTACT FORTE ANNUEL PAR OEIL PAR APPAREIL\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": 23.69, \"amount\": 320.9}, {\"description\": \"LENTILLE DE CONTACT FORTE ANNUEL PAR OEIL PAR APPAREIL\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": 23.69, \"amount\": 320.9}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/64aaff25-d506-4738-8e41-d49c0ed339ba_Recu_2024-05-24_130512_-_GODEFROY_Steevens_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fe965a4b-4eab-407e-8b44-d80fca74c489_Dde_remb_MGP_goutte_occ_Manon_Gouez_Cardon_du_22_05_2024_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fe965a4b-4eab-407e-8b44-d80fca74c489_Dde_remb_MGP_goutte_occ_Manon_Gouez_Cardon_du_22_05_2024_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Pharmacie\", \"adeli_number\": \"240038226\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 19.63, \"bill_paid\": true, \"amount_paid\": 3.59, \"mandatory_coverage\": 16.04, \"complementary_coverage\": 0, \"client_part\": 3.59, \"remaining_payment\": null, \"insured_name\": \"CARDON MAGALI\", \"insured_dob\": null, \"beneficiary_name\": \"GOUEZ-CARDON MANON\", \"beneficiary_dob\": \"12-07-2011\", \"invoice_date\": \"22-05-2024\", \"security_number\": \"276056015912622\", \"invoice_issuer\": \"PHARMACIE DE BREUIL LE VERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Vismed COLLY R/3XY\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 16.04, \"amount\": 19.63}], \"prescripteur_finess_number\": \"600100648\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b8feb7cc-40df-4d9a-b250-a06d519fdd3d_IMG_20240522_181521_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bedf599f-99db-4613-aa22-17d52690c34c_doc00194520240521104544_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"331744995\", \"rpps_number\": \"10102300802\", \"finess_number\": null, \"doctor_name\": \"MORENO GARCIA BASILIO\", \"total_billed\": 95.29, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": 40.29, \"complementary_coverage\": null, \"client_part\": 55.0, \"remaining_payment\": null, \"insured_name\": \"LAQUECHE JEAN-LOUIS\", \"insured_dob\": \"23-10-1967\", \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"15-05-2024\", \"security_number\": \"1671064483436\", \"invoice_issuer\": \"MORENO GARCIA BASILIO\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"ADI 1\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b5cfbf34-f15b-4f2e-bbe9-e83c86549d98_FACTURE_OPHTALMO_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10107362989\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VEIGA DE LA JARA CARLOS\", \"total_billed\": 61, \"bill_paid\": true, \"amount_paid\": 32.02, \"mandatory_coverage\": 28.98, \"complementary_coverage\": 0, \"client_part\": 32.02, \"remaining_payment\": null, \"insured_name\": \"LARRIEU MARILYS\", \"insured_dob\": null, \"beneficiary_name\": \"LARRIEU MARILYS\", \"beneficiary_dob\": null, \"invoice_date\": \"17-04-2024\", \"security_number\": \"286046443003106\", \"invoice_issuer\": \"BEARN OPHTALMOS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 19.8, \"amount\": 41.67}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 9.18, \"amount\": 19.33}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d3ef8b3d-b78f-4455-b2d6-1c9ef9ec5e60_Quittance_Ophtalmologue_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d3ef8b3d-b78f-4455-b2d6-1c9ef9ec5e60_Quittance_Ophtalmologue_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"171702558\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ANSQUIN MAXIME\", \"total_billed\": 91.68, \"bill_paid\": true, \"amount_paid\": 53.79, \"mandatory_coverage\": 37.89, \"complementary_coverage\": 0, \"client_part\": 53.79, \"remaining_payment\": 0, \"insured_name\": \"CRENIER CHRISTOPHE\", \"insured_dob\": \"27-03-1974\", \"beneficiary_name\": \"CRENIER CHRISTOPHE\", \"beneficiary_dob\": \"27-03-1974\", \"invoice_date\": \"12-03-2024\", \"security_number\": \"1740394038040\", \"invoice_issuer\": \"ANSQUIN MAXIME\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 ATM 8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 37.89, \"amount\": 91.68}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"171702558\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ANSQUIN MAXIME\", \"total_billed\": 27.32, \"bill_paid\": true, \"amount_paid\": 6.52, \"mandatory_coverage\": 20.8, \"complementary_coverage\": null, \"client_part\": 6.52, \"remaining_payment\": null, \"insured_name\": \"CRENIER CHRISTOPHE\", \"insured_dob\": \"27-03-1974\", \"beneficiary_name\": \"CRENIER CHRISTOPHE\", \"beneficiary_dob\": \"27-03-1974\", \"invoice_date\": \"12-03-2024\", \"security_number\": \"1740394038040\", \"invoice_issuer\": \"ANSQUIN MAXIME\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY 8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 20.8, \"amount\": 27.32}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0252b5fb-bc84-4fca-bfa7-483ac611885a_Scan_0115_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"10100683647\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"AIFA ABDELOUAHAB\", \"total_billed\": 70.0, \"bill_paid\": true, \"amount_paid\": 29.05, \"mandatory_coverage\": 40.95, \"complementary_coverage\": null, \"client_part\": 29.05, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GIACCHETTO MARTIAL\", \"beneficiary_dob\": null, \"invoice_date\": \"14-05-2024\", \"security_number\": \"156055438203844\", \"invoice_issuer\": \"SELARL DOCTEUR AIFA ABDELOUAHAB\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"14-05-2024\", \"mandatory_coverage\": 28.29, \"amount\": 48.36}, {\"description\": \"ATM\", \"quantity\": null, \"date_of_service\": \"14-05-2024\", \"mandatory_coverage\": 12.66, \"amount\": 21.64}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f55f9121-499d-4c25-b79b-464413d7ca6c_Quittance_Ophtalmologue_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f55f9121-499d-4c25-b79b-464413d7ca6c_Quittance_Ophtalmologue_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"171702558\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ANSQUIN MAXIME\", \"total_billed\": 91.68, \"bill_paid\": true, \"amount_paid\": 53.79, \"mandatory_coverage\": 37.89, \"complementary_coverage\": null, \"client_part\": 53.79, \"remaining_payment\": null, \"insured_name\": \"CRENIER CHRISTOPHE\", \"insured_dob\": \"27-03-1974\", \"beneficiary_name\": \"CRENIER CHRISTOPHE\", \"beneficiary_dob\": \"27-03-1974\", \"invoice_date\": \"12-03-2024\", \"security_number\": \"1740394038040\", \"invoice_issuer\": \"ANSQUIN MAXIME\", \"currency\": \"EUR\", \"items\": [{\"description\": \"2 ATM 8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 37.89, \"amount\": 91.68}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Ophtalmologie\", \"adeli_number\": \"171702558\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ANSQUIN MAXIME\", \"total_billed\": 27.32, \"bill_paid\": true, \"amount_paid\": 6.52, \"mandatory_coverage\": 20.8, \"complementary_coverage\": null, \"client_part\": 6.52, \"remaining_payment\": null, \"insured_name\": \"CRENIER CHRISTOPHE\", \"insured_dob\": \"27-03-1974\", \"beneficiary_name\": \"CRENIER CHRISTOPHE\", \"beneficiary_dob\": \"27-03-1974\", \"invoice_date\": \"12-03-2024\", \"security_number\": \"1740394038040\", \"invoice_issuer\": \"ANSQUIN MAXIME\", \"currency\": \"EUR\", \"items\": [{\"description\": \"AMY 8\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 20.8, \"amount\": 27.32}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0268b186-6926-46af-b75f-ba01c02cb647_IMG_20240514_152542_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 88.35, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LAPALUS Lucile\", \"beneficiary_dob\": \"25-01-1990\", \"invoice_date\": \"30-09-2022\", \"security_number\": \"290015439530136\", \"invoice_issuer\": \"CENTRE MEDICAL OPHTALMOLOGlQUE POINT VISION LA DEFENSE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"BJQP002 BBQP001 AMY 8.5\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 88.35}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ec6baa8c-a189-4850-b310-c90bab372cc2_Facture_Krys_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"921058335\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SCMELA Benjamin\", \"total_billed\": 342.0, \"bill_paid\": true, \"amount_paid\": 137.94, \"mandatory_coverage\": 0.09, \"complementary_coverage\": null, \"client_part\": 341.91, \"remaining_payment\": 204.06, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"John MOLINES\", \"beneficiary_dob\": \"31-01-1990\", \"invoice_date\": \"10-02-2024\", \"security_number\": \"190018519122018\", \"invoice_issuer\": \"LES OPTICIENS KRYS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Design Eyewear Group-189, r. d'Aubervilliers-FR-75018 \", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 192.0}, {\"description\": \"Bbgr-22, Rue de Montmorency-FR-75003 ParisNikon SuperSee AP 1.50 SCC+UV SuperSee AP1.50 Blanc SCC+ UV Origine France Garantie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 75.0}, {\"description\": \"Personnalisation Commande avec verre surmesure\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": null}, {\"description\": \"OptiSlim Epaisseurs optimis\\u00e9es\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": null}, {\"description\": \"Bbgr-22, Rue de Montmorency-FR-75003 ParisNikon SuperSee AP 1.50 SCC+UV SuperSee AP1.50 Blanc SCC+ UV Origine France Garantie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 75.0}, {\"description\": \"OptiSlim Epaisseurs optimis\\u00e9es\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b9e93ae5-1490-4f10-97fb-28f8eb89296c_20240513140855738_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b9e93ae5-1490-4f10-97fb-28f8eb89296c_20240513140855738_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "[{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"771043312\", \"rpps_number\": \"10000703362\", \"finess_number\": \"772603817\", \"doctor_name\": \"MARTIN BEUZART Sylvie\", \"total_billed\": 340.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 289.91, \"client_part\": 50.0, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Thomas MELLET\", \"beneficiary_dob\": \"24-11-2009\", \"invoice_date\": \"25-04-2024\", \"security_number\": \"164097511231288\", \"invoice_issuer\": \"Le Collectif des Lunetiers Pomponne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Safilo France Sarl -20-24 Rue Jacques Ibert-FR-92300 Levallois Perret Tommy Hilfiger\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 150.0}, {\"description\": \"Codir - Avenue de Paris - FR - 78550 Bazainville\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 95.0}, {\"description\": \"QUATRO UV Antireflet facile \\u00e0 entretenir\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": null}, {\"description\": \"Codir - Avenue de Paris - FR - 78550 Bazainville\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 95}, {\"description\": \"QUATRO UV Antireflet facile \\u00e0 entretenir\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": null}], \"prescripteur_finess_number\": null}, {\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": \"771043312\", \"rpps_number\": \"10000703362\", \"finess_number\": \"772603817\", \"doctor_name\": \"MARTIN BEUZART Sylvie\", \"total_billed\": 1.0, \"bill_paid\": true, \"amount_paid\": 1.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Thomas MELLET\", \"beneficiary_dob\": \"24-11-2009\", \"invoice_date\": \"25-04-2024\", \"security_number\": \"164097511231288\", \"invoice_issuer\": \"Le Collectif des Lunetiers Pomponne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Codir - Avenue de Paris - FR - 78550 Bazainville\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 0.5}, {\"description\": \"Codir - Avenue de Paris - FR - 78550 Bazainville\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 0.25}, {\"description\": \"GRIS 85% Verre teint\\u00e9 gris 85%\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": null}, {\"description\": \"UV PROTECTION 380 Traitement anti UV\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": null}, {\"description\": \"Codir - Avenue de Paris - FR - 78550 Bazainville\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0.0, \"amount\": 0.25}, {\"description\": \"GRIS 85% Verre teint\\u00e9 gris 85%\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": null}, {\"description\": \"UV PROTECTION 380 Traitement anti UV\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": 0, \"amount\": null}], \"prescripteur_finess_number\": null}]" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/dc4e7302-925d-438f-82d5-e78510d858c5_7447113_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"752617001\", \"doctor_name\": null, \"total_billed\": 55.8, \"bill_paid\": true, \"amount_paid\": 55.8, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"B\\u00e9atrice BRUGIROUX\", \"beneficiary_dob\": null, \"invoice_date\": \"29-04-2024\", \"security_number\": null, \"invoice_issuer\": \"SENSEE SAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Air Optix plus Hydraglyde for Astigmatism (6) (toriques)\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 47.9}, {\"description\": \"OPTI-FREE puremoist 300ml\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 12.9}, {\"description\": \"Frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 5.9}, {\"description\": \"R\\u00e9duction frais de livraison\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -5.9}, {\"description\": \"R\\u00e9duction produit\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": -5.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f482ebef-e885-4627-a927-96ad54ab5aa0_Facture_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f482ebef-e885-4627-a927-96ad54ab5aa0_Facture_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Optique\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": \"402610869\", \"doctor_name\": null, \"total_billed\": 484.0, \"bill_paid\": true, \"amount_paid\": 484.0, \"mandatory_coverage\": 0.09, \"complementary_coverage\": 60.0, \"client_part\": 423.91, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAUNIER Benjamin\", \"beneficiary_dob\": \"19-09-1997\", \"invoice_date\": \"18-04-2024\", \"security_number\": \"1970913155680\", \"invoice_issuer\": \"Vieux Boucau Optique\", \"currency\": \"EUR\", \"items\": [{\"description\": \"APL - WHISTLER HILLS - WHIJOHILLMILL034 - 1 53/17 (1)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 129.0}, {\"description\": \"COD - ORUS 167 SUV AS B-PROTECT_RX - 65\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 147.0}, {\"description\": \"CALISIZE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"B-PROTECT\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"COD - ORUS 167 SUV AS B-PROTECT_RX - 65\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": 0.03, \"amount\": 147.0}, {\"description\": \"CALISIZE\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"B-PROTECT\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"OPA - OWLET ORIGINALS - OWII361 - C93 - 49/24 (2)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 1.0}, {\"description\": \"COD - ORUS 16 SUV SCINTIL_ ST - 70\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}, {\"description\": \"COD - ALTERNANCE UNIF 16 SUV SCINTIL_ST - 65 \", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 30.0}, {\"description\": \"SCINTI\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}, {\"description\": \"DURCI\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": \"401804364\"}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a69ab716-a557-4a04-ad51-df42f8b7afac_Facture-CONSTANT-F2025-638_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"669303075\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Fanny CONSTANT\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 120.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Malirach S\\u00e9bastien\", \"beneficiary_dob\": null, \"invoice_date\": \"09-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Fanny CONSTANT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"th\\u00e9rapie Familiale\", \"quantity\": 1, \"date_of_service\": \"09-05-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"th\\u00e9rapie Familiale\", \"quantity\": 1, \"date_of_service\": \"02-05-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9bf58be1-5d5b-4dca-940f-1e65a9bddde3_IMG_9136_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"789315009\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Tobi Nadia\", \"total_billed\": 250.0, \"bill_paid\": true, \"amount_paid\": 250.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laura ANTOINE\", \"beneficiary_dob\": null, \"invoice_date\": \"01-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Tobi Nadia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1er trimestre 2025\", \"quantity\": 1, \"date_of_service\": \"03-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"1er trimestre 2025\", \"quantity\": 1, \"date_of_service\": \"08-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"1er trimestre 2025\", \"quantity\": 1, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"1er trimestre 2025\", \"quantity\": 1, \"date_of_service\": \"22-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"1er trimestre 2025\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ada5466f-7504-47b3-a2ea-6b7a257d3366_Eischen_Guillaume_facture_2025-02-12_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10008894411\", \"finess_number\": null, \"doctor_name\": \"Nathalie Raymond\", \"total_billed\": 630.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EISCHEN\", \"beneficiary_dob\": null, \"invoice_date\": \"12-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Nathalie Raymond\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"09-04-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"07-05-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"20-08-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"03-09-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"01-10-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"07-11-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"17-12-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"14-01-2025\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"s\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b867febf-2383-4d84-ab00-48fc8c9e5873_Facture_acquittee_Clement_BRUNET_Neuropsy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"259304756\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lise BOUILLET\", \"total_billed\": 360, \"bill_paid\": true, \"amount_paid\": 360, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Cl\\u00e9ment BRUNET\", \"beneficiary_dob\": \"13-03-2018\", \"invoice_date\": \"02-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Lise BOUILLET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Efficience intellectuelle\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 200}, {\"description\": \"Evaluations compl\\u00e9mentaires (attention/fonctions ex\\u00e9cutives/psychoaffectif)\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 160}, {\"description\": \"Entretien de restitution du bilan\", \"quantity\": null, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": null}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a59bb8d4-e537-4c26-b14e-d8730ae431c6_Facture_DE-260425_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"779316868\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"V\\u00e9ronique ROUX\", \"total_billed\": 260, \"bill_paid\": true, \"amount_paid\": 260, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Etienne DAUVERGNE\", \"beneficiary_dob\": null, \"invoice_date\": \"05-05-2025\", \"security_number\": null, \"invoice_issuer\": \"V\\u00e9ronique ROUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations \\u00e0 vis\\u00e9 th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"17-01-2025\", \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"Consultations \\u00e0 vis\\u00e9 th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"14-02-2025\", \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"Consultations \\u00e0 vis\\u00e9 th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"14-03-2025\", \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"Consultations \\u00e0 vis\\u00e9 th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"26-04-2025\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3d0dbcb2-40d9-4ea6-a8b0-bd0e6a4e01fa_Facture_mars_et_avril_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10009881979\", \"finess_number\": null, \"doctor_name\": \"Magalie SORLIN\", \"total_billed\": 110.0, \"bill_paid\": true, \"amount_paid\": 110.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHANTELOUVE LESTRAS No\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"07-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Magalie SORLIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1.0, \"date_of_service\": \"10-03-2025\", \"mandatory_coverage\": null, \"amount\": 55.0}, {\"description\": \"Consultation psychologique\", \"quantity\": 1.0, \"date_of_service\": \"07-04-2025\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ba12418d-8275-47fe-95a7-5c5b5d34d8ee_FACTURE_PSY_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"789323276\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sandrine BOUSSIOUS\", \"total_billed\": 75.0, \"bill_paid\": true, \"amount_paid\": 75.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAUDEUX Fran\\u00e7ois\", \"beneficiary_dob\": null, \"invoice_date\": \"28-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Sandrine BOUSSIOUS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation mycologique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 75.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f567826c-1e71-4182-8a34-85737706378c_Facture_psy_2025_02_27_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"449306893\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne Ruesche\", \"total_billed\": 260, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Manpet B\\u00e9atrice\", \"beneficiary_dob\": null, \"invoice_date\": \"27-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Anne Ruesche\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"13-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"16-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3a89f641-924b-4420-b418-56c4e770479d_psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"649309279\", \"rpps_number\": \"10008727850\", \"finess_number\": null, \"doctor_name\": \"AUSSENAC LAURE\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Merlax Denis\", \"beneficiary_dob\": null, \"invoice_date\": \"01-03-2025\", \"security_number\": null, \"invoice_issuer\": \"AUSSENAC LAURE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"01-03-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/52f5edb7-4dae-4fa0-a6c4-4cb89e50ed94_17468070012235815678244917321976_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nadine CATRY-TAILLIEZ\", \"total_billed\": 60, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HENNEBICQUE \\u00c9ric\", \"beneficiary_dob\": null, \"invoice_date\": \"09-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Nadine CATRY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"21-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"11-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"22-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"consultation\", \"quantity\": 1, \"date_of_service\": \"09-05-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_rotated\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/13612e18-44fa-4474-98f0-efa3e301d513_Facture_Rouat_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"849308416\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DOERFLINGER Estelle\", \"total_billed\": 150, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rouat Coralie\", \"beneficiary_dob\": null, \"invoice_date\": \"07-03-2025\", \"security_number\": null, \"invoice_issuer\": \"DOERFLINGER Estelle\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"07-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e9e55605-6b6e-4037-afc1-3d878569b0cd_Facture_Dr_Ropars_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10008885799\", \"finess_number\": null, \"doctor_name\": \"Chantal ROPARS\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOIVIN Claire\", \"beneficiary_dob\": null, \"invoice_date\": \"14-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Chantal ROPARS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soins de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"14-05-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0ecc80c9-51cf-4813-a8b9-b5d4be9097a3_17473133332224237954771730089694_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"959300427\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DAHAN Eva\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FAIVRE Louis\", \"beneficiary_dob\": null, \"invoice_date\": \"14-05-2025\", \"security_number\": null, \"invoice_issuer\": \"DAHAN Eva\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"14-05-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/841df60b-7e6e-43f0-ba01-5f70339e2160_20250521_125710_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"959321555\", \"rpps_number\": \"10009030445\", \"finess_number\": null, \"doctor_name\": \"Laurence Touitou\", \"total_billed\": 70, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vincent Jombat\", \"beneficiary_dob\": null, \"invoice_date\": \"20-05-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET DE PSYCHOLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"20-05-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/03cf6201-9575-4f3f-8048-2e26340accd1_Screenshot_20250305_110300_M365_Copilot_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"499305951\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marion SAIDI-BLANCHARD\", \"total_billed\": 180, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BOUCHER Patrice\", \"beneficiary_dob\": null, \"invoice_date\": \"05-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Marion SAIDI-BLANCHARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"03-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"20-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/aea24a8a-7340-404b-b050-211c1379bcd8_Thevon_Rudy_facture_2025-03-13_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"759328073\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ariane L\\u00e9vy\", \"total_billed\": 160, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Thevon Rudy\", \"beneficiary_dob\": null, \"invoice_date\": \"13-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Ariane L\\u00e9vy psychologue clinicienne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ances de Thevon Rudy\", \"quantity\": 1, \"date_of_service\": \"07-03-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"s\\u00e9ances de Thevon Rudy\", \"quantity\": 1, \"date_of_service\": \"13-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/24b074c3-c00b-4079-803c-8f48775e0734_facture_psy_17_fevrier_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"959321555\", \"rpps_number\": \"10009030445\", \"finess_number\": null, \"doctor_name\": \"Laurence Touitou\", \"total_billed\": 150, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emilie JOMBART\", \"beneficiary_dob\": null, \"invoice_date\": \"17-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET DE PSYCHOLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 75}, {\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"17-02-2025\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d6beac48-be51-437f-b0c3-9c0353817f27_FACTURE_PSYCHO_17-02-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339316226\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Pauline CONDON\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SCHULLER Myriam\", \"beneficiary_dob\": null, \"invoice_date\": \"17-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Pauline CONDON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"17-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ce4aa309-f005-4307-81fb-537871e79ea9_20250217202224_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"769302241\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Delphine LEBLANC\", \"total_billed\": 120, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Didier CAP\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Delphine LEBLANC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"04-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2e16e441-5e10-48ea-93e9-42903aa9f270_facture_psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"599304854\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Claire Laterrade\", \"total_billed\": 140, \"bill_paid\": true, \"amount_paid\": 140, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"S\\u00e9bastien ROUSSEAU\", \"beneficiary_dob\": null, \"invoice_date\": \"06-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Claire Laterrade\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"1 consultation de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"06-03-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/acbda15f-944a-4ab4-9ce1-4088e4ad4703_emeline_psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"779313550\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Champion de Brito Sylviane\", \"total_billed\": 100, \"bill_paid\": true, \"amount_paid\": 100, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emeline VITU\", \"beneficiary_dob\": null, \"invoice_date\": \"21-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Champion de Brito Sylviane\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION DE PSYCHOLOGIE\", \"quantity\": 1, \"date_of_service\": \"13-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"CONSULTATION DE PSYCHOLOGIE\", \"quantity\": 1, \"date_of_service\": \"21-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7defe730-5720-478d-bde4-d5dc33573854_Note_dhonoraires_Susie_MARTELOT_120325_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"859305849\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne de Bonneval\", \"total_billed\": 45, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Susie MARTELOT\", \"beneficiary_dob\": \"01-05-2005\", \"invoice_date\": \"12-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Anne de Bonneval\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"12-03-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/943cd21b-42cc-48b9-87ac-8caf3f599137_20250310181906_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"479302903\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sandrine Chiaradia\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 20, \"mandatory_coverage\": 30, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"M. Dumain\", \"beneficiary_dob\": null, \"invoice_date\": \"27-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Sandrine Chiaradia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/34d942a5-b3b9-4432-8c0f-286b519ed010_Screenshot_20250606-165450_Yahoo_Mail_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"479304537\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Maude ORTEGA\", \"total_billed\": 100, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Amandine Miquel\", \"beneficiary_dob\": null, \"invoice_date\": \"06-06-2025\", \"security_number\": null, \"invoice_issuer\": \"Maude ORTEGA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"27-05-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"06-06-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a0a48232-f424-4e22-b333-753dad33e987_Facture-MATHIEU_JUDITH-PASCAL-LAULERGUE_Christelle-F-2025-041267_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHRISTELLE PASCAL-LAULERGUE\", \"total_billed\": 325.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MATHIEU JUDITH\", \"beneficiary_dob\": null, \"invoice_date\": \"12-04-2025\", \"security_number\": null, \"invoice_issuer\": \"CHRISTELLE PASCAL-LAULERGUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"29-01-2024\", \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"26-02-2024\", \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"27-02-2024\", \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"06-03-2024\", \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"13-03-2024\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d444da87-4359-4515-8045-a6d47e125598_Facture_psychologue_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d444da87-4359-4515-8045-a6d47e125598_Facture_psychologue_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"929319564\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne GAUVENET\", \"total_billed\": 240, \"bill_paid\": true, \"amount_paid\": 240, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"AUBIGNAC Liliane\", \"beneficiary_dob\": null, \"invoice_date\": \"16-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Anne GAUVENET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"05-12-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"12-12-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"09-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"16-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0ef1eba8-dc8b-40ad-9916-a49cb7854820_IMG_6116_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"699337820\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Claudio JOFR\\u00c9\", \"total_billed\": 240, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mael DOUBLECOURT\", \"beneficiary_dob\": null, \"invoice_date\": \"24-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Claudio JOFR\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances psychologiques\", \"quantity\": 1, \"date_of_service\": \"24-09-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ances psychologiques\", \"quantity\": 1, \"date_of_service\": \"21-10-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ances psychologiques\", \"quantity\": 1, \"date_of_service\": \"04-11-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ances psychologiques\", \"quantity\": 1, \"date_of_service\": \"03-12-2024\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ff6669a9-47fa-444f-a3d0-6afeb6f4e1d4_391DB09B-BD62-4605-B1BA-A8CF31B97424_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"719304909\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"France Coissard\", \"total_billed\": 480.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"De Araujo Gregory\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Cabinet de Psychologie du travail et des Organisations Sp\\u00e9cialis\\u00e9 dans les Risques Psychosociaux et la Qualit\\u00e9 de Vie au Travail\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"14-01-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"05-08-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"04-07-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"03-06-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"30-03-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"21-03-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"19-02-2024\", \"mandatory_coverage\": null, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e6528824-c26c-420c-93b5-76c2db6dd370_Mon_scan_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"019301001\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DUCHAS CATHERINE\", \"total_billed\": 58.0, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Desvignes Arnaud\", \"beneficiary_dob\": null, \"invoice_date\": \"04-06-2025\", \"security_number\": null, \"invoice_issuer\": \"DUCHAS CATHERINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"04-06-2025\", \"mandatory_coverage\": null, \"amount\": 58.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fd93157f-d003-42c5-bc78-bd1c3bc8f481_F20253-00243_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"139309587\", \"rpps_number\": \"10009986489\", \"finess_number\": null, \"doctor_name\": \"Alexandrine BERNARD\", \"total_billed\": 80.0, \"bill_paid\": true, \"amount_paid\": 80.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BONO Loris\", \"beneficiary_dob\": null, \"invoice_date\": \"22-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Alexandrine BERNARD - Psychologue (EI)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance psychologie EMDR\", \"quantity\": 1, \"date_of_service\": \"22-03-2025\", \"mandatory_coverage\": null, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ff830baf-b846-4e4f-8ab0-a41e4ade00d8_IMG_0479_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339309569\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sol\\u00e8ne TAILL\\u00c9\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Xavier HARISMENDY\", \"beneficiary_dob\": null, \"invoice_date\": \"24-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Sol\\u00e8ne TAILL\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Tarif Consultation pr\\u00e9sentielle\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c5d2e602-7353-4b21-84d4-3ee1802bd791_psyLeopoldmars25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"429301195\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Wycka\\u00ebrt Blandine\", \"total_billed\": 70, \"bill_paid\": true, \"amount_paid\": 70, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Leopold COUTURIER\", \"beneficiary_dob\": null, \"invoice_date\": \"12-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Wycka\\u00ebrt Blandine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"12-03-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9cc705c6-03e3-435c-8510-55f47c8058e3_Psy_250504_220141_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"949315535\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"VERONIQUE LE MEUR\", \"total_billed\": 120.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"THIBAUD REA\", \"beneficiary_dob\": null, \"invoice_date\": \"01-05-2025\", \"security_number\": null, \"invoice_issuer\": \"VERONIQUE LE MEUR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Psychoth\\u00e9rapie individuelle\", \"quantity\": 1.0, \"date_of_service\": \"14-03-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Psychoth\\u00e9rapie individuelle\", \"quantity\": 1.0, \"date_of_service\": \"16-04-2015\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4fdec999-0763-4c63-a5ef-c8ea7fd51865_Facture_psychologue_avril_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"689305969\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anna POLIAKOW-HAMLAT\", \"total_billed\": 100.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHRISTINE LUYE TANET\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Anna POLIAKOW-HAMLAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de supervision de pratique\", \"quantity\": 1, \"date_of_service\": \"15-04-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"S\\u00e9ance de supervision de pratique\", \"quantity\": 1, \"date_of_service\": \"29-04-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b56d0f30-2cf9-4863-b31a-014a62681227_Facture_psychologie_Anaelle_CROCHET_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"279302053\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Oph\\u00e9lie Elorin-Delaunay\", \"total_billed\": 80.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CROCHET Ma\\u00eblle\", \"beneficiary_dob\": null, \"invoice_date\": \"22-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Oph\\u00e9lie Elorin-Delaunay\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 80.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4eb5bfa7-5d56-42f3-bbce-ff777b38e949_Facture_Hugues_VLAMYNCK_2023__202_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"789319977\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Antony PEREIRA\", \"total_billed\": 500, \"bill_paid\": true, \"amount_paid\": 500, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Hugues Vlamynck\", \"beneficiary_dob\": null, \"invoice_date\": \"22-11-2023\", \"security_number\": null, \"invoice_issuer\": \"Antony PEREIRA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Bilan neuropsychologique\", \"quantity\": 1, \"date_of_service\": \"07-11-2023\", \"mandatory_coverage\": null, \"amount\": 250}, {\"description\": \"Bilan neuropsychologique\", \"quantity\": 1, \"date_of_service\": \"15-11-2023\", \"mandatory_coverage\": null, \"amount\": 250}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a1e65e24-78cb-4f60-820b-b9f3b29cd449_Facture_psy_Baptiste_CHAUDERLOT_-_17.06.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": 37.8, \"bill_paid\": true, \"amount_paid\": 11.34, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": \"CHAUDERLOT THOMAS\", \"insured_dob\": null, \"beneficiary_name\": \"CHAUDERLOT Lea\", \"beneficiary_dob\": \"26-01-2015\", \"invoice_date\": \"18-06-2025\", \"security_number\": \"183072B03309550\", \"invoice_issuer\": \"Centre d'Imagerie M\\u00e9dicale Gardanne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"ADE\", \"quantity\": null, \"date_of_service\": \"18-06-2025\", \"mandatory_coverage\": null, \"amount\": 37.8}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fd4f69a7-1d6a-4b44-a0ee-9764ceda50f7_PaiementTiers25032025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/51c2643d-42a2-44ec-8050-66dffe4bf544_Scan_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"769308107\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurence ROIG\", \"total_billed\": 55, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Deloigue Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"23-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Madame Laurence ROIG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"28-03-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/28430340-f740-47df-97e3-08d21894dc8e_IMG_20250430_204132_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"319306585\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"FOUCAUDEAU Marie-Lise\", \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 160.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PARY-TERRIEN Giullia\", \"beneficiary_dob\": null, \"invoice_date\": \"26-03-2025\", \"security_number\": null, \"invoice_issuer\": \"FOUCAUDEAU Marie-Lise\", \"currency\": \"EUR\", \"items\": [{\"description\": \"suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"22-01-2025\", \"mandatory_coverage\": null, \"amount\": 40.0}, {\"description\": \"suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"26-02-2025\", \"mandatory_coverage\": null, \"amount\": 40.0}, {\"description\": \"suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"12-03-2025\", \"mandatory_coverage\": null, \"amount\": 40.0}, {\"description\": \"suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"26-03-2025\", \"mandatory_coverage\": null, \"amount\": 40.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b3bcea45-e22e-4b81-8f7b-a7597488f62a_1748944762742_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"509302261\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Matthieu ROBIN\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Julien Busselet-Nanguet\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Matthieu ROBIN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances Psychologue\", \"quantity\": 1, \"date_of_service\": \"26-03-2025\", \"mandatory_coverage\": null, \"amount\": 40}, {\"description\": \"S\\u00e9ances Psychologue\", \"quantity\": 1, \"date_of_service\": \"16-04-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9832e73b-4ea3-4070-a6b8-24338e943f59_Lego_Alicia_facture_2025-03-11_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10009036269\", \"finess_number\": null, \"doctor_name\": \"Christine Clavert\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alicia LEGO\", \"beneficiary_dob\": null, \"invoice_date\": \"11-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Christine Clavert El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"04-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"11-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f90254bd-ff91-4601-bfad-eae9c815a07e_Facture_PSY_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"9A9301325\", \"rpps_number\": \"10008801960\", \"finess_number\": null, \"doctor_name\": \"JERSIER Jessie\", \"total_billed\": 300, \"bill_paid\": true, \"amount_paid\": 300, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"HALLPIKE Ingrid\", \"beneficiary_dob\": null, \"invoice_date\": \"02-05-2025\", \"security_number\": null, \"invoice_issuer\": \"JERSIER Jessie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"04-10-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultations de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"11-10-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultations de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"31-10-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultations de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"25-11-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultations de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a191069f-3b59-4658-9cb4-a9af39e37f76_PXL_20250514_073538098_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"559301890\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Camille LEICK\", \"total_billed\": 65, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Olivia GIURLANDO\", \"beneficiary_dob\": \"07-02-2003\", \"invoice_date\": \"13-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Camille LEICK\", \"currency\": \"SE\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"13-05-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/048978c3-c76f-4729-8c87-9f07922b6af4_FACTURE_2025_-_AlexisV754_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"389318916\", \"rpps_number\": \"10009873992\", \"finess_number\": null, \"doctor_name\": \"Marion TERMOZ-MASSON\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alexis VALLELIAN\", \"beneficiary_dob\": null, \"invoice_date\": \"15-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Marion TERMOZ-MASSON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychoth\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"13-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f760f990-d2d4-4d1b-ba52-4a4a24bbe598_20250518185704_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"769302241\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Delphine LEBLANC\", \"total_billed\": 127, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Louis Ferdinand CAP\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Delphine LEBLANC\", \"currency\": \"EUR\", \"items\": [{\"description\": \"PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"22-04-2025\", \"mandatory_coverage\": null, \"amount\": 63.5}, {\"description\": \"PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"28-04-2025\", \"mandatory_coverage\": null, \"amount\": 63.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ea7fb5d8-95d6-4c32-baaa-bbd0a112a1fd_Facture_psy_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Audrey Vettes\", \"total_billed\": 350, \"bill_paid\": true, \"amount_paid\": 350, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"H\\u00e9l\\u00e8ne Vergne\", \"beneficiary_dob\": null, \"invoice_date\": \"14-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Audrey Vettes\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"14-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-03-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"13-03-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"14-04-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/10a4f816-60b4-45e0-8e53-150316c605b0_facture_EMDR_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"069324150\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anais MALLEA\", \"total_billed\": 210.0, \"bill_paid\": true, \"amount_paid\": 210.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GERMANY Joelle\", \"beneficiary_dob\": null, \"invoice_date\": \"12-03-2025\", \"security_number\": null, \"invoice_issuer\": \"El Anais MALLEA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychologie / psychoth\\u00e9rapie\", \"quantity\": 1.0, \"date_of_service\": \"26-02-2025\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Consultation de psychologie / psychoth\\u00e9rapie\", \"quantity\": 1.0, \"date_of_service\": \"21-02-2025\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Consultation de psychologie / psychoth\\u00e9rapie\", \"quantity\": 1.0, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0c764b95-eeb1-45e6-82ee-22364a491ce6_1222024_jacquemart_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"599319522\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vandycke Aur\\u00e9lie\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jacquenart David\", \"beneficiary_dob\": null, \"invoice_date\": \"14-08-2024\", \"security_number\": null, \"invoice_issuer\": \"Vandycke Aur\\u00e9lie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation individuelle\", \"quantity\": 1, \"date_of_service\": \"01-08-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/26ea618c-f4b3-457a-8f1b-c64cbe533324_doc01400620250214144333_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"959314717\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurence Vignali\", \"total_billed\": 70, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lola SUROT\", \"beneficiary_dob\": \"09-04-2021\", \"invoice_date\": \"27-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Laurence Vignali\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance d'entretien psychologique pour l'enfant\", \"quantity\": 1, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null, \"is_rotated\": true, \"is_handwriting\": true}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e581c241-0645-456e-965d-c575448c5504_IMG_0855_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339309569\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sol\\u00e8ne TAILL\\u00c9\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Xavier HARISMENDY\", \"beneficiary_dob\": null, \"invoice_date\": \"27-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Sol\\u00e8ne TAILL\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Tarif Consultation pr\\u00e9sentielle\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c1322e70-753a-4fb2-b2d8-6556ee40e4c3_Facture_acquittee_Mme_DEPLANCHE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"219304870\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Elodie DUCREUX\", \"total_billed\": 240.0, \"bill_paid\": true, \"amount_paid\": 240.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Margaux DEPLANCHE\", \"beneficiary_dob\": null, \"invoice_date\": \"10-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Point m\\u00e9ducal - Rond-point de la Nation\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretien psychologique\", \"quantity\": 1, \"date_of_service\": \"07-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Entretien psychologique\", \"quantity\": 1, \"date_of_service\": \"14-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Entretien psychologique\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Entretien psychologique\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6f578c6b-691e-4dba-8def-6d2be88550ca_IMG_7126_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"699310694\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"G\\u00e9raldine MOULIN\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Loubna ROUIBAH\", \"beneficiary_dob\": null, \"invoice_date\": \"03-06-2025\", \"security_number\": null, \"invoice_issuer\": \"G\\u00e9raline MOULIN (-EI-)\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"16-05-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"28-05-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/145cf4a4-e4dd-47b5-a911-aa9797e6d8f1_Facture_2025_06_02_02_NOEL_Claire_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10109962778\", \"finess_number\": null, \"doctor_name\": \"Marine Den\\u00e9c\\u00e9\", \"total_billed\": 450.0, \"bill_paid\": true, \"amount_paid\": 450.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Claire NO\\u00cbL\", \"beneficiary_dob\": null, \"invoice_date\": \"02-06-2025\", \"security_number\": null, \"invoice_issuer\": \"CABINET DE NEUROPSYCHOLOGIE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ances de psychologie\", \"quantity\": 1, \"date_of_service\": \"30-04-2015\", \"mandatory_coverage\": null, \"amount\": 75.0}, {\"description\": \"s\\u00e9ances de psychologie\", \"quantity\": 1, \"date_of_service\": \"07-05-2025\", \"mandatory_coverage\": null, \"amount\": 75.0}, {\"description\": \"s\\u00e9ances de psychologie\", \"quantity\": 1, \"date_of_service\": \"14-05-2025\", \"mandatory_coverage\": null, \"amount\": 75.0}, {\"description\": \"s\\u00e9ances de psychologie\", \"quantity\": 1, \"date_of_service\": \"21-05-2025\", \"mandatory_coverage\": null, \"amount\": 75.0}, {\"description\": \"s\\u00e9ances de psychologie\", \"quantity\": 1, \"date_of_service\": \"28-05-2025\", \"mandatory_coverage\": null, \"amount\": 75.0}, {\"description\": \"s\\u00e9ances de psychologie\", \"quantity\": 1, \"date_of_service\": \"02-06-2025\", \"mandatory_coverage\": null, \"amount\": 75.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/da67a6c4-5de0-494d-9854-fae189c21198_Attestation_mutuelle_M.LUCAS_Julien_03-25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339302606\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie-Line RENARD\", \"total_billed\": 100, \"bill_paid\": true, \"amount_paid\": 100, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LUCAS Julien\", \"beneficiary_dob\": \"27-09-1986\", \"invoice_date\": \"28-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de Psychologie & de Sant\\u00e9 Mentale au Travail\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation individuelle de souffrance au travail\", \"quantity\": 1, \"date_of_service\": \"20-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"consultation individuelle de souffrance au travail\", \"quantity\": 1, \"date_of_service\": \"28-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/92f29254-bbd5-41df-bba2-82349d3bfc03_Facture_Julia_Mai_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10009666727\", \"finess_number\": null, \"doctor_name\": \"Nancy Rodriguez\", \"total_billed\": 48, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Julia MARTINEZ\", \"beneficiary_dob\": null, \"invoice_date\": \"16-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de Psychologie Nancy Rodriguez E.I.\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation individuelle\", \"quantity\": 1, \"date_of_service\": \"07-05-2025\", \"mandatory_coverage\": null, \"amount\": 48}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d9bf98e4-e6f7-4a51-8b4c-12a7380435cc_note_dhonoraire__4_2_2_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10009996579008\", \"finess_number\": null, \"doctor_name\": \"Aliz\\u00e9e VIGIER\", \"total_billed\": 250, \"bill_paid\": true, \"amount_paid\": 250, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Maevy Stephan\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Aliz\\u00e9e VIGIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations \\u00e0 suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"06-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultations \\u00e0 suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"13-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultations \\u00e0 suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"20-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultations \\u00e0 suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultations \\u00e0 suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a38bf59e-df27-4165-ae75-15cc23151e5d_Factures_2024_-_B_Mattias_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"349317537\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"C\\u00e9cile Hill\", \"total_billed\": 550.0, \"bill_paid\": true, \"amount_paid\": 550.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Mattias Barbezier\", \"beneficiary_dob\": null, \"invoice_date\": \"03-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Mme C\\u00e9cile Hill\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation Individuelle en pr\\u00e9sentiel\", \"quantity\": 1, \"date_of_service\": \"18-06-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Consultation Individuelle en pr\\u00e9sentiel\", \"quantity\": 1, \"date_of_service\": \"24-06-2024\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Consultation Individuelle en pr\\u00e9sentiel\", \"quantity\": 1, \"date_of_service\": \"15-07-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Consultation Individuelle en pr\\u00e9sentiel\", \"quantity\": 1, \"date_of_service\": \"23-08-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Consultation Individuelle en pr\\u00e9sentiel\", \"quantity\": 1, \"date_of_service\": \"16-09-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Consultation Individuelle en pr\\u00e9sentiel\", \"quantity\": 1, \"date_of_service\": \"14-10-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Consultation Individuelle en pr\\u00e9sentiel\", \"quantity\": 1, \"date_of_service\": \"18-11-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Consultation Individuelle en pr\\u00e9sentiel\", \"quantity\": 1, \"date_of_service\": \"16-12-2024\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0e94942e-4291-4707-ba05-41ce53fca855_Facture_acquittee_Margot_MARTINEZ_2025_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0e94942e-4291-4707-ba05-41ce53fca855_Facture_acquittee_Margot_MARTINEZ_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"139325351\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Carole RODON\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 120.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARTINEZ Margot\", \"beneficiary_dob\": null, \"invoice_date\": \"02-06-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de psychologie Rodon-Guesdon\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychologie - psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"21-05-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Consultation de psychologie - psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"23-04-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5147af98-38c2-4776-99ce-8ada661e86f4_2025-04-15_psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"029304144\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Richard ROKICKI\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GASPARIK \\u00c9milie\", \"beneficiary_dob\": null, \"invoice_date\": \"15-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Richard ROKICKI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"15-04-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/000b40b5-ae2b-45bb-9cf7-c597effd058f_Facture_seances_psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"949305775\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Catherine FINKEL\", \"total_billed\": 350, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"URVOY Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"04-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Catherine FINKEL\", \"currency\": \"EUR\", \"items\": [{\"description\": \"entretiens psychologiques\", \"quantity\": 1, \"date_of_service\": \"06-11-2024\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"entretiens psychologiques\", \"quantity\": 1, \"date_of_service\": \"20-11-2024\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"entretiens psychologiques\", \"quantity\": 1, \"date_of_service\": \"11-12-2024\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"entretiens psychologiques\", \"quantity\": 1, \"date_of_service\": \"28-12-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"entretiens psychologiques\", \"quantity\": 1, \"date_of_service\": \"03-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"entretiens psychologiques\", \"quantity\": 1, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"entretiens psychologiques\", \"quantity\": 1, \"date_of_service\": \"29-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ce8de90c-de04-4d58-b48b-cde62895e08e_2025-03-01_141722_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"959316175\", \"rpps_number\": \"10008484551\", \"finess_number\": null, \"doctor_name\": \"Agn\\u00e8s GUILLET DE LA BROSSE\", \"total_billed\": 420, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Sylvain Levy-Renault\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": \"190099517603229\", \"invoice_issuer\": \"Agn\\u00e8s GUILLET DE LA BROSSE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Note d'honoraires\", \"quantity\": 1, \"date_of_service\": \"07-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Note d'honoraires\", \"quantity\": 1, \"date_of_service\": \"14-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Note d'honoraires\", \"quantity\": 1, \"date_of_service\": \"21-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Note d'honoraires\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Note d'honoraires\", \"quantity\": 1, \"date_of_service\": \"18-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Note d'honoraires\", \"quantity\": 1, \"date_of_service\": \"28-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d986e1c7-bb83-49ed-9784-27a9d96a7ecd_Facture_psychologue__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"689305969\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anna POLIAKOW-HAMLAT\", \"total_billed\": 150.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CHRISTINE LUYE TANET\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Anna POLIAKOW-HAMLAT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de supervision de pratique\", \"quantity\": 1, \"date_of_service\": \"12-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"S\\u00e9ance de supervision de pratique\", \"quantity\": 1, \"date_of_service\": \"11-03-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"S\\u00e9ance de supervision de pratique\", \"quantity\": 1, \"date_of_service\": \"25-03-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2b8bd1cb-a43a-41e2-bf5c-5abeae34806e_Thevon_Rudy_facture_2025-04-04_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"759328073\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ariane L\\u00e9vy\", \"total_billed\": 240, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Thevon Rudy\", \"beneficiary_dob\": null, \"invoice_date\": \"04-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Ariane L\\u00e9vy psychologue clinicienne\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ances de psychologie\", \"quantity\": 1, \"date_of_service\": \"19-03-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"s\\u00e9ances de psychologie\", \"quantity\": 1, \"date_of_service\": \"25-03-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"s\\u00e9ances de psychologie\", \"quantity\": 1, \"date_of_service\": \"01-04-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/04f67ff1-82e2-4ea6-81ce-0f338ec5c503_downloadfile_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"929345700\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Elisa BELL\\u00c9GO\", \"total_billed\": 130, \"bill_paid\": true, \"amount_paid\": 130, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CILETTI Caroline\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Elisa BELL\\u00c9GO - PSYCHOLOGUE CLINICIENNE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"28-01-2025\", \"mandatory_coverage\": null, \"amount\": 65.0}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 65.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6b01cea0-09da-45ae-bb62-3d2f320969cd_Facture_psy_marie_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"591130729\", \"rpps_number\": \"10002241379\", \"finess_number\": null, \"doctor_name\": \"Bruno Francq\", \"total_billed\": null, \"bill_paid\": true, \"amount_paid\": 53.74, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marie Hyette\", \"beneficiary_dob\": null, \"invoice_date\": \"06-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Docteur Bruno Franco\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 55.0}, {\"description\": \"1 consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"06-05-2025\", \"mandatory_coverage\": null, \"amount\": 55.0}, {\"description\": \"ALQP003\", \"quantity\": 1, \"date_of_service\": \"18-03-2025\", \"mandatory_coverage\": null, \"amount\": 69.12}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f019e2bd-699e-4f71-a900-f0d6c8798953_Facture_psychologue_2_seances_2025_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f019e2bd-699e-4f71-a900-f0d6c8798953_Facture_psychologue_2_seances_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"929319564\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne GAUVENET\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Aubignac Liliane\", \"beneficiary_dob\": null, \"invoice_date\": \"07-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Anne GAUVENET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"03-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"10-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a04fac7a-eebb-4111-8222-5e80a14b67aa_facture_RM_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"759339724\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emmanuel FARAUS\", \"total_billed\": 540.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Romain Mercier\", \"beneficiary_dob\": null, \"invoice_date\": \"19-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Emmanuel FARAUS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"04-12-2024\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Consultation de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"19-12-2024\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Consultation de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Consultation de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"06-02-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Consultation de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Consultation de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"19-03-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/56192f72-666e-4749-900b-ed02aaf1fdd6_facturevgirard_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"9D9306617\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Grauwin Lo\\u00efc\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vincent Girard\", \"beneficiary_dob\": null, \"invoice_date\": \"10-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Grauwin Lo\\u00efc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"01-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"28-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/dc98cfbb-a903-437b-9b79-f409318ac025_20250322_161439_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"319314472\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne-Sophie ELBEZE\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lagrange Adina\", \"beneficiary_dob\": null, \"invoice_date\": \"20-03-2025\", \"security_number\": \"295016015939420\", \"invoice_issuer\": \"Anne-Sophie ELBEZE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"20-03-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5fe375b6-ac9f-47a2-bd89-ac79f9035da1_FACTURE_RIGAL_Lomane_12_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"409305182\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Lisa BORDE\", \"total_billed\": 150, \"bill_paid\": true, \"amount_paid\": 150, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RIGAL Lomane\", \"beneficiary_dob\": null, \"invoice_date\": \"19-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Consultations psychologiques & psychoth\\u00e9rapeutiques\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations psychologiques\", \"quantity\": 1, \"date_of_service\": \"07-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultations psychologiques\", \"quantity\": 1, \"date_of_service\": \"14-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultations psychologiques\", \"quantity\": 1, \"date_of_service\": \"19-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/00f158df-d31b-4292-8d57-1b0349d3494a_psy2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"649309279\", \"rpps_number\": \"10008727850\", \"finess_number\": null, \"doctor_name\": \"AUSSENAC Laure\", \"total_billed\": 55.0, \"bill_paid\": true, \"amount_paid\": 55.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Merlax Denis\", \"beneficiary_dob\": null, \"invoice_date\": \"15-02-2025\", \"security_number\": null, \"invoice_issuer\": \"AUSSENAC Laure\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"15-02-2025\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c08a45e9-ea1b-4d5e-9c6a-d141a930555a_Facture_psychiatre__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychiatrie\", \"adeli_number\": null, \"rpps_number\": \"10102034872\", \"finess_number\": null, \"doctor_name\": \"ESTELLE COURANT\", \"total_billed\": 150.0, \"bill_paid\": true, \"amount_paid\": 150.0, \"mandatory_coverage\": 29.75, \"complementary_coverage\": 0.0, \"client_part\": 120.25, \"remaining_payment\": 0.0, \"insured_name\": \"MAURAND MARIE\", \"insured_dob\": \"19-04-2000\", \"beneficiary_name\": \"MAURAND MARIE\", \"beneficiary_dob\": \"19-04-2000\", \"invoice_date\": \"05-05-2025\", \"security_number\": \"2000475117523\", \"invoice_issuer\": \"ESTELLE COURANT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CP\", \"quantity\": null, \"date_of_service\": \"05-05-2025\", \"mandatory_coverage\": 29.75, \"amount\": 150.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/10bf2fe2-31c6-45a2-9c08-2b826fd1ab14_20250411190619_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"479302903\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sandrine Chiaradia\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 20, \"mandatory_coverage\": 30, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DUMAIN Nathalie\", \"beneficiary_dob\": null, \"invoice_date\": \"10-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Sandrine Chiaradia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"10-04-2025\", \"mandatory_coverage\": 30, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/96d6b4c9-cb52-4e48-90b8-7a0789d77809_2024-12-09_GP_merged_1_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/96d6b4c9-cb52-4e48-90b8-7a0789d77809_2024-12-09_GP_merged_2_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"229303466\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie TRICOIRE\", \"total_billed\": 35, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vanessa CHALON \", \"beneficiary_dob\": null, \"invoice_date\": \"09-12-2024\", \"security_number\": \"283079712023960\", \"invoice_issuer\": \"Centre de Psychotrauma Rennais\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de groupe de parole\", \"quantity\": 1, \"date_of_service\": \"09-12-2024\", \"mandatory_coverage\": null, \"amount\": 35}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/42d5ec9a-3594-4d64-9f24-31573fd89736_Chloee_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"769302365\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Christine ENOUX\", \"total_billed\": 125.0, \"bill_paid\": true, \"amount_paid\": 125.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Chlo\\u00e9 Floch\", \"beneficiary_dob\": \"06-10-2001\", \"invoice_date\": \"22-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Christine ENOUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"14-03-2025\", \"mandatory_coverage\": null, \"amount\": 62.5}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"22-03-2025\", \"mandatory_coverage\": null, \"amount\": 62.5}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/106f9f9a-c660-42f2-aa70-8ced525d2fdb_Rayan_Zyan_03.25_psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"069315851\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CZARIK ELEONORE\", \"total_billed\": 260, \"bill_paid\": true, \"amount_paid\": 260, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Rayan ZYANE\", \"beneficiary_dob\": \"17-10-2012\", \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"CZARIK ELEONORE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultations psychologiques\", \"quantity\": 1, \"date_of_service\": \"01-03-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"consultations psychologiques\", \"quantity\": 1, \"date_of_service\": \"08-03-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"consultations psychologiques\", \"quantity\": 1, \"date_of_service\": \"15-03-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"consultations psychologiques\", \"quantity\": 1, \"date_of_service\": \"22-03-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3d554fa9-0f1e-4169-b0a4-fa3a8c1869c3_facture_Maelle_S._1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"389312737\", \"rpps_number\": \"10009828533\", \"finess_number\": null, \"doctor_name\": \"Yannick HIMBER\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SAULCE Ma\\u00eblle\", \"beneficiary_dob\": null, \"invoice_date\": \"28-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Yannick HIMBER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"suivi psychologique\", \"quantity\": 2, \"date_of_service\": \"27-05-2025\", \"mandatory_coverage\": null, \"amount\": 120}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e63f7d6a-09a2-4b13-8b97-60084a475756_facture_2025_-_46_Charlotte_Pelat__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"759357197\", \"rpps_number\": \"10008956947\", \"siret\": \"45323712500050\", \"doctor_name\": \"Agn\\u00e8s Falabr\\u00e8gues\", \"total_billed\": 270.0, \"bill_paid\": true, \"amount_paid\": 270.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Charlotte Pelat\", \"beneficiary_dob\": null, \"invoice_date\": \"03-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Agn\\u00e8s Falabr\\u00e8gues\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations 1h\", \"quantity\": 1, \"date_of_service\": \"07-04-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Consultations 1h\", \"quantity\": 1, \"date_of_service\": \"15-04-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Consultations 1h\", \"quantity\": 1, \"date_of_service\": \"30-04-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/574d06fb-08d4-4360-a02b-dc9926aaefa5_20250430_092611_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"9A9300822\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Nadia GOB\", \"total_billed\": 280.0, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vanessa PASCAL\", \"beneficiary_dob\": null, \"invoice_date\": \"29-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Nadia GOB\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Psychoth\\u00e9rapie 4 s\\u00e9ances\", \"quantity\": 1, \"date_of_service\": \"26-02-2025\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Psychoth\\u00e9rapie 4 s\\u00e9ances\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Psychoth\\u00e9rapie 4 s\\u00e9ances\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"Psychoth\\u00e9rapie 4 s\\u00e9ances\", \"quantity\": 1, \"date_of_service\": \"19-03-2025\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/945ea559-809a-4186-a94d-ea319fb29c19_Facture_consultation_Alexanne_Letourneur_Fabre_fev_25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"069316685\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"La\\u00ebtitia Behir\", \"total_billed\": 120, \"bill_paid\": null, \"amount_paid\": false, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Alexanne Letourneur Fabre\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"La\\u00ebtitia Behir\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"26-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5b90c5eb-6a25-4c20-986c-a94e327a3bfb_Ramonet_Louise_facture_2025-05-21_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10008894411\", \"finess_number\": null, \"doctor_name\": \"Nathalie Raymond\", \"total_billed\": 200, \"bill_paid\": true, \"amount_paid\": 200, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"RAMONET Louise\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Nathalie Raymond\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie et de psychologie\", \"quantity\": 1, \"date_of_service\": \"25-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie et de psychologie\", \"quantity\": 1, \"date_of_service\": \"11-03-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie et de psychologie\", \"quantity\": 1, \"date_of_service\": \"01-04-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie et de psychologie\", \"quantity\": 1, \"date_of_service\": \"06-05-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f36e5737-a4c7-4231-aa1b-9a30251a12eb_Facture_psychologue_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"359309636\", \"rpps_number\": \"10009527176\", \"finess_number\": null, \"doctor_name\": \"Kristelle Douillet\", \"total_billed\": 135, \"bill_paid\": true, \"amount_paid\": 135, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jegou Corentin\", \"beneficiary_dob\": null, \"invoice_date\": \"24-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Kristelle Douillet\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances\", \"quantity\": 1, \"date_of_service\": \"24-04-2025\", \"mandatory_coverage\": null, \"amount\": 45}, {\"description\": \"S\\u00e9ances\", \"quantity\": 1, \"date_of_service\": \"27-03-2025\", \"mandatory_coverage\": null, \"amount\": 45}, {\"description\": \"S\\u00e9ances\", \"quantity\": 1, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/700c7771-82fd-4cc1-b8dc-0fc051d927cf_Facture_140e_Psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"779319847\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Vicente Cindy\", \"total_billed\": 140, \"bill_paid\": true, \"amount_paid\": 140, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lucas FUNCK\", \"beneficiary_dob\": null, \"invoice_date\": \"13-06-2025\", \"security_number\": null, \"invoice_issuer\": \"CENTRE MEDICAL DES SAPINS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"06-06-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"13-06-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9a208af0-46b9-44ca-89a8-2ee549194ecb_Mon_scan_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"019301001\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Duchas Catherine\", \"total_billed\": 58, \"bill_paid\": true, \"amount_paid\": 58, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Desvignes Arnaud\", \"beneficiary_dob\": null, \"invoice_date\": \"21-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Duchas Catherine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 entretiens ce jour\", \"quantity\": 1, \"date_of_service\": \"21-03-2025\", \"mandatory_coverage\": null, \"amount\": 58}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f6076bbd-eb41-4341-9ea6-4d00e704a64f_IMG20250315150144_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"759370844\", \"rpps_number\": \"10008985722\", \"finess_number\": null, \"doctor_name\": \"FREDERIC DEMOULE\", \"total_billed\": 180, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Shaan THIAUCOURT\", \"beneficiary_dob\": null, \"invoice_date\": \"27-02-2025\", \"security_number\": null, \"invoice_issuer\": \"FRERERIC DEMOULE PSY & CORPS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance de psy\", \"quantity\": 1, \"date_of_service\": \"16-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"1 s\\u00e9ance de psy\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"1 s\\u00e9ance de psy\", \"quantity\": 1, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ae569b21-0381-4618-9a09-1ee64b3322c9_FACTURE_JANVIER_2025_PSYCHOLOGUE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"929307346\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Patricia Mouillier\", \"total_billed\": 300.0, \"bill_paid\": true, \"amount_paid\": 300.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Marl\\u00e8ne Loison\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Patricia Mouillier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Accompagnement th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 75}, {\"description\": \"Accompagnement th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"17-01-2025\", \"mandatory_coverage\": null, \"amount\": 75}, {\"description\": \"Accompagnement th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"24-01-2025\", \"mandatory_coverage\": null, \"amount\": 75}, {\"description\": \"Accompagnement th\\u00e9rapeutique\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fe0b1f03-e51b-4bb7-a295-c5794162a6ed_Scan_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"769308107\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laurence ROIG\", \"total_billed\": 55, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DAFOIGU Isabelle\", \"beneficiary_dob\": null, \"invoice_date\": \"02-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Madame Laurence ROIG\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"02-05-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/66619215-a711-45ff-8d93-0991c05398eb_Facture-202503-241_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10009042523\", \"finess_number\": null, \"doctor_name\": \"Salom\\u00e9 Amato\", \"total_billed\": 180.0, \"bill_paid\": true, \"amount_paid\": 180.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Theresa SAM\", \"beneficiary_dob\": null, \"invoice_date\": \"17-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Salom\\u00e9 Amato El\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de suivi de psychologie\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Consultation de suivi de psychologie\", \"quantity\": 1, \"date_of_service\": \"17-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Consultation de suivi de psychologie\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3c72bfde-288a-4ee2-b582-23c503689e79_LC030625_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"149315053\", \"rpps_number\": \"10008621566\", \"finess_number\": null, \"doctor_name\": \"CAREL L\\u00e9a\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEBORGNE Cl\\u00e9ment\", \"beneficiary_dob\": null, \"invoice_date\": \"03-06-2025\", \"security_number\": null, \"invoice_issuer\": \"Madame CAREL L\\u00e9a E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Adultes - Premi\\u00e8re consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"03-06-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5b554cd0-80d9-47c7-a6b0-700fc72fa85a_Facture-THOMAS_JUDITH-PASCAL-LAULERGUE_Christelle-F-2025-041266_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"CHRISTELLE PASCAL-LAULERGUE\", \"total_billed\": 455.0, \"bill_paid\": true, \"amount_paid\": 455.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"THOMAS JUDITH\", \"beneficiary_dob\": null, \"invoice_date\": \"12-04-2025\", \"security_number\": null, \"invoice_issuer\": \"CHRISTELLE PASCAL-LAULERGUE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"24-09-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"05-11-2024\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"18-11-2024\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"02-12-2024\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"11-12-2024\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"CONSULTATIONS DE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"10-03-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ced0d2e2-01f7-4641-86ae-f8267d994bec_2025-03-17-facture_13_Fayolle_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339335150\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Elodie Benamer\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fayolle Aurore\", \"beneficiary_dob\": null, \"invoice_date\": \"17-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Mme Elodie Benamer\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi psychologique\", \"quantity\": null, \"date_of_service\": \"17-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ee0a2514-6c62-4ba9-8700-9421ec2ba0ac_CONIN_-_Copie.pdf14_03_25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"919315531\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Launay Eug\\u00e9nie\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CONIN Sylvie\", \"beneficiary_dob\": null, \"invoice_date\": \"26-11-2021\", \"security_number\": null, \"invoice_issuer\": \"Launay Eug\\u00e9nie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7f3f262b-eec3-4537-aaed-39a92f809c8e_IMG_20250523_095303_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"919315333\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"St\\u00e9phane Homeyer\", \"total_billed\": 240.0, \"bill_paid\": true, \"amount_paid\": 240.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Micha Planchais\", \"beneficiary_dob\": null, \"invoice_date\": \"31-01-2025\", \"security_number\": null, \"invoice_issuer\": \"St\\u00e9phane Homeyer\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances suivi psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"08-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"S\\u00e9ances suivi psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"18-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"S\\u00e9ances suivi psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_serice\": \"25-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"S\\u00e9ances suivi psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e0d19e8e-de77-4579-a63c-1eca2e49f392_TheraSomnia-TS0193603F0054882_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"939314530\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Eliana Benard\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 120.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Guillaume Granier\", \"beneficiary_dob\": null, \"invoice_date\": \"05-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Th\\u00e9raSomnia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 entretiens de 30mn\", \"quantity\": null, \"date_of_service\": \"05-04-2025\", \"mandatory_coverage\": null, \"amount\": 30.0}, {\"description\": \"2 entretiens de 30mn avec votre psychologue clinicien d\\u00e9di pour vous accompagner dans votre programme et vous aider \\u00e0 retrouver la s\\u00e9rit\\u00e9 du quotidien\", \"quantity\": null, \"date_of_service\": \"05-04-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"Un parcours d'activit\\u00e9s en ligne ( relaxation, gestion du temps, activit\\u00e9 sportive, th\\u00e9rapie comportementale et cognitive)\", \"quantity\": null, \"date_of_service\": \"05-04-2025\", \"mandatory_coverage\": null, \"amount\": 55.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9c43d980-82c8-435b-86f8-a4054ebd3abd_Screenshot_20250612-132725_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"559301890\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Camille LEICK\", \"total_billed\": 65, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Olivia GIURLANDO\", \"beneficiary_dob\": null, \"invoice_date\": \"12-06-2025\", \"security_number\": null, \"invoice_issuer\": \"Camille LEICK\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"12-06-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/29bbe97d-ca0c-4d73-a92b-59ac2fba93c7_2025-06-03_-_Remboursement_soins_psy_Jade_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": false, \"profession\": null, \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": null, \"total_billed\": null, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": null, \"items\": [], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/5dd8bdb8-4f9a-4e61-9845-ffa1cdcd7c1f_CONSULTATIONS_PSY_2023_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"279306047\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Laure Mann\", \"total_billed\": 1300.0, \"bill_paid\": true, \"amount_paid\": 1300.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Caroline Van Assche\", \"beneficiary_dob\": null, \"invoice_date\": \"18-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Laure Mann\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"02-02-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"27-02-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"12-01-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"19-01-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"20-04-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"16-03-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"04-05-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"11-05-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"04-05-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"11-05-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"06-07-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"16-02-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}, {\"description\": \"Psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"09-03-2023\", \"mandatory_coverage\": null, \"amount\": 100.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6cde8bb2-2432-4990-a3f2-df774af7df36_FACTURE_14_FEVRIER_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": \"449309400\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Stephanie Corsan\", \"total_billed\": 147.0, \"bill_paid\": true, \"amount_paid\": 147.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Besse marie-pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"14-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Stephanie Corsan\", \"currency\": \"EUR\", \"items\": [{\"description\": \"therapie connect/1 visio\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 147.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/472edc36-5dc4-4122-ad6c-3cbc0797ee5c_IMG_4003_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"889303582\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emilie PONCET\", \"total_billed\": 117.75, \"bill_paid\": true, \"amount_paid\": 117.75, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"PARIS Valentin\", \"beneficiary_dob\": null, \"invoice_date\": \"14-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Emilie PONCET\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Soin \\u00e9nerg\\u00e9tique\", \"quantity\": null, \"date_of_service\": \"14-03-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Parfum harmonie \\\"Phylos\\\"\", \"date_of_service\": \"14-03-2025\", \"mandatory_coverage\": null, \"amount\": 27.75}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6cde8bb2-2432-4990-a3f2-df774af7df36_FACTURE_14_FEVRIER_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Unknown\", \"adeli_number\": \"449309400\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Stephanie Corsan\", \"total_billed\": 147.0, \"bill_paid\": true, \"amount_paid\": 147.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Besse marie-pierre\", \"beneficiary_dob\": null, \"invoice_date\": \"14-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Stephanie Corsan\", \"currency\": \"EUR\", \"items\": [{\"description\": \"therapie connect/1 visio\", \"quantity\": 1, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 147.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/56828076-3403-40b9-8767-b08084ee4a4f_mme_matelak_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"309300374\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florence Matelak\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 120.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MARIE THERESE FABRIGUES\", \"beneficiary_dob\": null, \"invoice_date\": \"21-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de Psychologie de Florence Matelak\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION\", \"quantity\": 1, \"date_of_service\": \"21-03-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"CONSULTATION\", \"quantity\": 1, \"date_of_service\": \"20-03-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/636fa79b-8dee-4464-98c6-4d1793660401_IMG-20250422-WA0002_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10009195685\", \"finess_number\": null, \"doctor_name\": \"Christelle DETABLE\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"SENNORAT Laurence\", \"beneficiary_dob\": null, \"invoice_date\": \"22-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de Psychologie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"05-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"18-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/cfb086d8-545a-40de-abe8-b65b71b6d9a2_20250610_Rbt_part_mutuelle_consultation_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"971450937\", \"rpps_number\": \"10100863751\", \"finess_number\": null, \"doctor_name\": \"Aurelie Yeuillaz\", \"total_billed\": 89.5, \"bill_paid\": true, \"amount_paid\": 44.0, \"mandatory_coverage\": 45.5, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0.0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EPERONNIER, JOHNNY\", \"beneficiary_dob\": \"28-02-1967\", \"invoice_date\": \"10-06-2025\", \"security_number\": \"167029741626358\", \"invoice_issuer\": \"CLINIQUE LES FLAMBOYANTS SUD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation neuropsychiatrie\", \"quantity\": 1, \"date_of_service\": \"10-06-2025\", \"mandatory_coverage\": null, \"amount\": 84.5}, {\"description\": \"Majoration de coordination sp\\u00e9cialiste\", \"quantity\": 1, \"date_of_service\": \"10-06-2025\", \"mandatory_coverage\": null, \"amount\": 5.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/34999dfa-c680-4fb0-8fec-d5f7ee5721ce_FACT_HORTENSE_HIEZ_COMTE_JUIN_25_10062025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"259306181\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie BESSON\", \"total_billed\": 455, \"bill_paid\": true, \"amount_paid\": 455, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"H.HIEZ-COMTE\", \"beneficiary_dob\": null, \"invoice_date\": \"12-06-2025\", \"security_number\": null, \"invoice_issuer\": \"Marie BESSON\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCES DE PSYCHOLOGIE AVEC MME H. HIEZ-COMTE\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"SEANCES DE PSYCHOLOGIE AVEC MME H. HIEZ-COMTE\", \"quantity\": 1, \"date_of_service\": \"24-02-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"SEANCES DE PSYCHOLOGIE AVEC MME H. HIEZ-COMTE\", \"quantity\": 1, \"date_of_service\": \"17-03-2025\", \"mandatory_coverage\": null, \"amount\": 130}, {\"description\": \"SEANCES DE PSYCHOLOGIE AVEC MME H. HIEZ-COMTE\", \"quantity\": 1, \"date_of_service\": \"03-04-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"SEANCES DE PSYCHOLOGIE AVEC MME H. HIEZ-COMTE\", \"quantity\": 1, \"date_of_service\": \"29-04-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"SEANCES DE PSYCHOLOGIE AVEC MME H. HIEZ-COMTE\", \"quantity\": 1, \"date_of_service\": \"19-05-2025\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"SEANCES DE PSYCHOLOGIE AVEC MME H. HIEZ-COMTE\", \"quantity\": 1, \"date_of_service\": \"12-06-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a527168b-c473-4af0-8e8f-0f40da16f1ca_IMG_20250502_144150_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"319319547\", \"rpps_number\": \"10009659987\", \"finess_number\": null, \"doctor_name\": \"CHAVATTE Valentine\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Agn\\u00e8s PARY\", \"beneficiary_dob\": null, \"invoice_date\": \"30-04-2025\", \"security_number\": \"273109407307918\", \"invoice_issuer\": \"CHAVATTE Valentine\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation individuelle\", \"quantity\": 1, \"date_of_service\": \"30-04-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fa04ab78-aeb7-4dca-af46-1473bcb91ef0_Facture_Adrian_Marcelot_2_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"789318656\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Florence Chauvot\", \"total_billed\": 140, \"bill_paid\": true, \"amount_paid\": 140, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Adrian Marcelot\", \"beneficiary_dob\": null, \"invoice_date\": \"05-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Florence Chauvot\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances individuelles de psychologie\", \"quantity\": 1, \"date_of_service\": \"10-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"S\\u00e9ances individuelles de psychologie\", \"quantity\": 1, \"date_of_service\": \"24-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f0cdbaa5-83b6-4cb1-8a5d-3a1adb2f1b6a_17409419952333040640543530601821_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"629311499\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"COUAILLIER NICOLAS\", \"total_billed\": 100.0, \"bill_paid\": true, \"amount_paid\": 100.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Fleuret Nicolas\", \"beneficiary_dob\": null, \"invoice_date\": \"20-02-2025\", \"security_number\": null, \"invoice_issuer\": \"COUAILLIER NICOLAS\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation et prise en charge Psychologique\", \"quantity\": 1, \"date_of_service\": \"18-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"Consultation et prise en charge Psychologique\", \"quantity\": 1, \"date_of_service\": \"22-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2e3670c8-d5d6-42a9-bf93-e990c4f6c631_2025-03-19_facture_psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"029304144\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Richard ROKICKI\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Emilie GASPARIK\", \"beneficiary_dob\": null, \"invoice_date\": \"19-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Richard ROKICKI\", \"currency\": \"EUR\", \"items\": [{\"description\": \"1 s\\u00e9ance de psychologie\", \"quantity\": 1, \"date_of_service\": \"19-03-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/07af035d-bd95-47ca-b5cb-be846ca0aad1_IMG_20250307_154818_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"319323366\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie H\\u00e9mard\", \"total_billed\": 272, \"bill_paid\": true, \"amount_paid\": 272, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": null, \"beneficiary_dob\": null, \"invoice_date\": \"19-12-2024\", \"security_number\": null, \"invoice_issuer\": \"Marie H\\u00e9mard\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"28-11-2024\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"05-12-2024\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"12-12-2024\", \"mandatory_coverage\": null, \"amount\": 65}, {\"description\": \"S\\u00e9ances de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"19-12-2024\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4a60cdb7-ec6e-4a3e-aead-8cc9dfab2d6b_note_dhonoraires_Susie_MARTELOT_190225_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"859305849\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Anne de Bonneval\", \"total_billed\": 65, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Susie MARTELOT\", \"beneficiary_dob\": \"01-05-2005\", \"invoice_date\": \"19-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Anne de Bonneval\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Attestation de consultation\", \"quantity\": 1, \"date_of_service\": \"19-02-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f75c1ab4-8c78-4a81-9e91-d2bc5912b149_IMG_0965_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339309569\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sol\\u00e8ne TAILL\\u00c9\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Xavier HARISMENDY\", \"beneficiary_dob\": null, \"invoice_date\": \"28-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Sol\\u00e8ne TAILL\\u00c9 - Psychologue clinicienne - Praticienne EMDR\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Tarif Consultation pr\\u00e9sentielle\", \"quantity\": 1, \"date_of_service\": \"28-04-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a5d23197-a19e-4148-b0d3-88306721869c_Facture_LHERBIER_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"9B9302214\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Gr\\u00e9gory LAPU\", \"total_billed\": 240.0, \"bill_paid\": true, \"amount_paid\": 240.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LHERBIER Emma\", \"beneficiary_dob\": null, \"invoice_date\": \"03-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Gr\\u00e9gory LAPU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretien psychologique\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Entretien psychologique\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Entretien psychologique\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Entretien psychologique\", \"quantity\": 1.0, \"date_of_service\": null, \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2e975812-ce83-414b-93ba-16fb5f78f782_Facture_psy_Allem_2025_2_scale_1.5.jpg" }, { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2e975812-ce83-414b-93ba-16fb5f78f782_Facture_psy_Allem_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"959315672\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Selma RAFIK\", \"total_billed\": 450, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nora ALLEM\", \"beneficiary_dob\": null, \"invoice_date\": \"11-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Selma RAFIK\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"08-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"16-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"19-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"06-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"13-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/03aa1567-a016-460b-ad51-1bb12d708d00_MIENS_LOUISON_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"789315967\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jean-Christophe ZAPATA\", \"total_billed\": 140, \"bill_paid\": true, \"amount_paid\": 140, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MIENS LOUISON\", \"beneficiary_dob\": null, \"invoice_date\": \"05-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Jean-Christophe ZAPATA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Deux consultations psychoth\\u00e9rapeutiques\", \"quantity\": 1, \"date_of_service\": \"15-11-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Deux consultations psychoth\\u00e9rapeutiques\", \"quantity\": 1, \"date_of_service\": \"05-05-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/dd0846a4-3a22-4a96-a162-c401f8829757_1292024_jacquemart_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10009277301\", \"finess_number\": null, \"doctor_name\": \"Vandycke Aur\\u00e9lie\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jacquenait David\", \"beneficiary_dob\": null, \"invoice_date\": \"14-08-2021\", \"security_number\": null, \"invoice_issuer\": \"Vandycke Aur\\u00e9lie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation individuelle\", \"quantity\": 1, \"date_of_service\": \"11-08-2024\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e9dbb6df-4146-4424-b9e2-64db6a59d0d9_PSY_2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"949333835\", \"rpps_number\": \"10008515081\", \"finess_number\": null, \"doctor_name\": \"Ars\\u00e8ne Tellier\", \"total_billed\": 240.0, \"bill_paid\": true, \"amount_paid\": 240.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Ludwig Boucher\", \"beneficiary_dob\": null, \"invoice_date\": \"17-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Ars\\u00e8ne Tellier\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"16-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Consultation de psychologie\", \"quantity\": 1, \"date_of_service\": \"13-03-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2f3f46bc-ac62-4f75-a20e-8479ba7b967d_IMG_20250311_080913_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"499302586\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Claire FICHANT\", \"total_billed\": 120.0, \"bill_paid\": true, \"amount_paid\": 120.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Adeline SIMON\", \"beneficiary_dob\": null, \"invoice_date\": \"10-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Claire FICHANT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 40.0}, {\"description\": \"Consultations\", \"quantity\": 1, \"date_of_service\": \"19-02-2025\", \"mandatory_coverage\": null, \"amount\": 40.0}, {\"description\": \"Consultations\", \"quantity\": 1, \"date_of_service\": \"06-03-2025\", \"mandatory_coverage\": null, \"amount\": 40.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/88956210-081c-44db-afc9-975f39370595_Facture_Laure_Brun1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"269303319\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Ang\\u00e8le FLORES-GENRE\", \"total_billed\": 70, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laure BRUN\", \"beneficiary_dob\": null, \"invoice_date\": \"08-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Ang\\u00e8le FLORES-GENRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION\", \"quantity\": 1, \"date_of_service\": \"08-04-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/30aab30a-311b-4d7b-a9ec-322629005f35_facture_psychologie_BECU_Laurene_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"759325814\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Myrto HADJIGEORGIOU\", \"total_billed\": 480, \"bill_paid\": true, \"amount_paid\": 480, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Laur\\u00e8ne BECU\", \"beneficiary_dob\": null, \"invoice_date\": \"31-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Myrto HADJIGEORGIOU\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"03-03-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"10-03-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"17-03-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"24-03-2025\", \"mandatory_coverage\": null, \"amount\": 80}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"31-03-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8af6cd35-3496-4587-a17a-ea03d972cbd8_IMG_9832_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"089300099\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Emmanuelle Roche-GAZQUEZ\", \"total_billed\": 135, \"bill_paid\": true, \"amount_paid\": 135, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Luca Fontabone Pointel\", \"beneficiary_dob\": null, \"invoice_date\": \"26-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de psychologue Emmanuelle Roche-GAZQUEZ\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"26-02-2025\", \"mandatory_coverage\": null, \"amount\": 45}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/2dd7feb3-73bb-4492-90a0-c0cb3b184020_Psy_2705_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"149315053\", \"rpps_number\": \"10008621566\", \"finess_number\": null, \"doctor_name\": \"CAREL L\\u00e9a\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GOSSELIN Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"27-05-2025\", \"security_number\": null, \"invoice_issuer\": \"CAREL L\\u00e9a E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Adultes - Consultation de suivi de psychologie\", \"quantity\": 1, \"date_of_service\": \"27-05-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6fee27bf-ff64-4305-bfc9-18bb6749bb9b_FACTURE_PSYCHIATRE_1_scale_0.4.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10101108990\", \"finess_number\": null, \"doctor_name\": \"BULEUX Mathieu\", \"total_billed\": 85.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": 29.75, \"complementary_coverage\": 0.0, \"client_part\": 55.25, \"remaining_payment\": null, \"insured_name\": \"DE POOTER Pascal\", \"insured_dob\": \"08-01-1972\", \"beneficiary_name\": \"DE POOTER Pascal\", \"beneficiary_dob\": \"08-01-1972\", \"invoice_date\": \"19-06-2025\", \"security_number\": \"172015935021043\", \"invoice_issuer\": \"CABINET DU DR MATHIEU BULEUX\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Acte CP\", \"quantity\": 1, \"date_of_service\": \"19-06-2025\", \"mandatory_coverage\": 29.75, \"amount\": 85.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/99d5efad-5fae-44a1-8740-0c63afae1c7d_facture_psy_Lola_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"459302162\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"GAUDICHON C\\u00e9lia\", \"total_billed\": 40, \"bill_paid\": true, \"amount_paid\": 40, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MAUFRAIS Lola\", \"beneficiary_dob\": \"22-12-2010\", \"invoice_date\": \"28-03-2025\", \"security_number\": null, \"invoice_issuer\": \"GAUDICHON C\\u00e9lia\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"28-03-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/526843b2-2326-4058-b8c5-ea57571688e7_LC120625_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"149315053\", \"rpps_number\": \"10008621566\", \"finess_number\": null, \"doctor_name\": \"CAREL L\\u00e9a\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LEBORGNE Cl\\u00e9ment\", \"beneficiary_dob\": null, \"invoice_date\": \"12-06-2025\", \"security_number\": null, \"invoice_issuer\": \"Madame CAREL L\\u00e9a E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Adultes - Consultation de suivi de psychologie\", \"quantity\": 1, \"date_of_service\": \"12-06-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b1334002-e912-4605-9afe-ef830039637a_factureDELEGLISE2025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10008543422\", \"finess_number\": null, \"doctor_name\": \"Fernando Luiz SALGADO DA SILVA\", \"total_billed\": 240, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Thierry DELEGLISE\", \"beneficiary_dob\": null, \"invoice_date\": \"15-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Fernando Luiz SALGADO DA SILVA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"19-02-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"19-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"16-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"14-05-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a2894d3b-e5a7-4a28-9bd9-98de9f406a2d_facturevgirard-1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"9D9306617\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Grauwin Lo\\u00efc\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vincent GIRARD\", \"beneficiary_dob\": null, \"invoice_date\": \"02-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Grauwin Lo\\u00efc\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"15-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"28-04-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/1a076d8a-4f9c-4cb5-bdd1-5544f8b3e723_Leherisse_Camille_facture_2025-03-17_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339313769\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marion Hazera\", \"total_billed\": 80, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Leh\\u00e9riss\\u00e9 Camille\", \"beneficiary_dob\": null, \"invoice_date\": \"17-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Marion Hazera\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"17-03-2025\", \"mandatory_coverage\": null, \"amount\": 80}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/892b99c9-bfbc-431a-94d8-adc38bbac97c_PXL_20250617_083450627_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"559301890\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Camille LEICK\", \"total_billed\": 65, \"bill_paid\": true, \"amount_paid\": 65, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Olivia GIURLANDO\", \"beneficiary_dob\": null, \"invoice_date\": \"12-06-2025\", \"security_number\": null, \"invoice_issuer\": \"Camille LEICK\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance\", \"quantity\": 1, \"date_of_service\": \"12-06-2025\", \"mandatory_coverage\": null, \"amount\": 65}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9e5b7c3f-1a50-49ce-9057-90bdae156a9d_Facture_SAFIER_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10008448234\", \"finess_number\": null, \"doctor_name\": \"David SAFIER\", \"total_billed\": 280, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Gr\\u00e9gory Raspal\", \"beneficiary_dob\": null, \"invoice_date\": \"26-03-2025\", \"security_number\": null, \"invoice_issuer\": \"David SAFIER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"04-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"14-03-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"26-03-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/3688f75c-7e6a-4e41-9ae5-397833aa4765_doc00505420250327083441_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"069316198\", \"rpps_number\": \"10010024320\", \"finess_number\": null, \"doctor_name\": \"VAILLANT-CISZEWICZ Anne-Julie\", \"total_billed\": 85, \"bill_paid\": false, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"ROPARS David\", \"beneficiary_dob\": null, \"invoice_date\": \"26-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Madame VAILLANT-CISZEWICZ Anne-Julie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"26-03-2025\", \"mandatory_coverage\": null, \"amount\": 85}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fcf38b4a-c7b5-43ea-a3dc-68528fbf562f_Screenshot_20250224-142611_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339302366\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"St\\u00e9phane SOURY\", \"total_billed\": 250, \"bill_paid\": true, \"amount_paid\": 250, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Juliane ALLENO\", \"beneficiary_dob\": null, \"invoice_date\": \"18-02-2025\", \"security_number\": null, \"invoice_issuer\": \"St\\u00e9phane SOURY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"22-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"4-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"11-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"18-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/36a93032-2849-4793-a3b0-8e4b0a6ca9a0_facture_DELCAMP_CANO_Ruben_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"069322345\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Muriel Vayssi\\u00e9\", \"total_billed\": 280, \"bill_paid\": true, \"amount_paid\": 280, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Coran Marilyn\", \"beneficiary_dob\": null, \"invoice_date\": \"27-02-2025\", \"security_number\": null, \"invoice_issuer\": \"Muriel Vayssie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"09-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"12-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"27-02-2025\", \"mandatory_coverage\": null, \"amount\": 70}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/52e644f0-f6e5-44cf-8f9b-bdf16ab47537_Facture_Psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"149315053\", \"rpps_number\": \"10008621566\", \"finess_number\": null, \"doctor_name\": \"CAREL L\\u00e9a\", \"total_billed\": 60, \"bill_paid\": true, \"amount_paid\": 60, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": 0, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GOSSELIN Marie\", \"beneficiary_dob\": null, \"invoice_date\": \"27-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Madame CAREL L\\u00e9a E.I\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Adultes - Consultation de suivi de psychologie\", \"quantity\": 1, \"date_of_service\": \"27-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a4e7e3d4-36e6-4253-ae25-c3b333ea8159_SF_INBOX_42762_7763c6_FACTURE-25-02-463-DELARUE_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"609312558\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HELLAL MARINNE\", \"total_billed\": 140.0, \"bill_paid\": true, \"amount_paid\": 140.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Johan DELARUE\", \"beneficiary_dob\": null, \"invoice_date\": \"26-02-2025\", \"security_number\": null, \"invoice_issuer\": \"HELLAL MARINNE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CONSULTATION PSYCHOLOGIQUE\", \"quantity\": 1.0, \"date_of_service\": \"31-01-2025\", \"mandatory_coverage\": null, \"amount\": 70.0}, {\"description\": \"CONSULTATION PSYCHOLOGIQUE\", \"quantity\": 1.0, \"date_of_service\": \"14-02-2025\", \"mandatory_coverage\": null, \"amount\": 70.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7371088b-ff8f-4fe6-aca2-8fb115003698_IMG_20250211_111124_1_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"649308657\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sandrine Mendizabal\", \"total_billed\": 210, \"bill_paid\": true, \"amount_paid\": 210, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Lucas Santos Bolle\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": \"Sandrine Mendizabal\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi psychologique\", \"quantity\": null, \"date_of_service\": \"20-11-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Suivi psychologique\", \"quantity\": null, \"date_of_service\": \"04-11-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Suivi psychologique\", \"quantity\": null, \"date_of_service\": \"18-12-2024\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"Suivi psychologique\", \"quantity\": null, \"date_of_service\": \"28-12-2024\", \"mandatory_coverage\": null, \"amount\": 30}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/feae0ee2-7412-4148-b0b3-62337cfc14bb_feuilles_de_soin_acquittee_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"971476197\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"J.F. REVERZY\", \"total_billed\": 160.0, \"bill_paid\": true, \"amount_paid\": 70.0, \"mandatory_coverage\": 63.0, \"complementary_coverage\": 27.0, \"client_part\": 70.0, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EUPHRASIE KEVIN\", \"beneficiary_dob\": \"19-04-1990\", \"invoice_date\": \"26-05-2025\", \"security_number\": \"190049741152119\", \"invoice_issuer\": \"Dr J.F. REVERZY\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CNP1,5\", \"quantity\": null, \"date_of_service\": \"26-05-2025\", \"mandatory_coverage\": 63.0, \"amount\": 160.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/fa6595be-294f-4b13-a215-103a714f6662_invoice_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"750075798\", \"rpps_number\": \"10002329547\", \"finess_number\": null, \"doctor_name\": \"Ludovic CHRISTEN\", \"total_billed\": 50.2, \"bill_paid\": true, \"amount_paid\": 15.06, \"mandatory_coverage\": 35.14, \"complementary_coverage\": null, \"client_part\": 15.06, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Jeanne TEKIELI\", \"beneficiary_dob\": \"18-01-2005\", \"invoice_date\": \"19-06-2025\", \"security_number\": \"205015439569425\", \"invoice_issuer\": \"Qare\", \"currency\": \"EUR\", \"items\": [{\"description\": \"TCS - T\\u00e9leconsultation du m\\u00e9decin specialiste psychiatrie, neuropsychiatrie ou neurologue\", \"quantity\": 1, \"date_of_service\": \"19-06-2025\", \"mandatory_coverage\": null, \"amount\": 50.2}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9c334320-209f-488d-806b-92af6cfcef30_Facture_psychologue_2025_rembourst_4_seeances_a_60e_001_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"849306162\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline GOUBERT\", \"total_billed\": 240, \"bill_paid\": true, \"amount_paid\": 240, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Miollan Maryse\", \"beneficiary_dob\": null, \"invoice_date\": \"04-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Caroline GOUBERT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ances de suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"17-01-2025\", \"mandatory_coverage\": null, \"amount\": 40}, {\"description\": \"s\\u00e9ances de suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"15-02-2025\", \"mandatory_coverage\": null, \"amount\": 40}, {\"description\": \"s\\u00e9ances de suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"01-03-2025\", \"mandatory_coverage\": null, \"amount\": 40}, {\"description\": \"s\\u00e9ances de suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"13-03-2025\", \"mandatory_coverage\": null, \"amount\": 40}, {\"description\": \"s\\u00e9ances de suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"28-03-2025\", \"mandatory_coverage\": null, \"amount\": 40}, {\"description\": \"s\\u00e9ances de suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"18-04-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e925faa1-5223-4004-92fc-f96b3b397739_img20250503_18205673_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"809306376\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LAPIERRE Audrey\", \"total_billed\": 60, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"BLANCHOT Yann\", \"beneficiary_dob\": null, \"invoice_date\": \"29-03-2025\", \"security_number\": null, \"invoice_issuer\": \"LAPIERRE Audrey\", \"currency\": \"EUR\", \"items\": [{\"description\": \"s\\u00e9ance de individuelle \", \"quantity\": 1, \"date_of_service\": \"29-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4e95b973-5730-459d-8b49-cf534f43cac7_Facture_2_seances_psy_-_Laura_GLIBERT__1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": \"10110354890\", \"finess_number\": null, \"doctor_name\": \"Aylin AYAZ\", \"total_billed\": 150.0, \"bill_paid\": true, \"amount_paid\": 150.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"GLIBERT Laura\", \"beneficiary_dob\": \"04-08-2002\", \"invoice_date\": \"15-05-2025\", \"security_number\": null, \"invoice_issuer\": \"NovaPsy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance psychologique du 15/05/2025\", \"quantity\": 1, \"date_of_service\": \"15-05-2025\", \"mandatory_coverage\": null, \"amount\": 75.0}, {\"description\": \"S\\u00e9ance psychologique du 15/05/2025\", \"quantity\": 1, \"date_of_service\": \"15-05-2025\", \"mandatory_coverage\": null, \"amount\": 75.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b77ac638-515c-4bc1-9d1e-77e862f9002a_Facture_2025-53_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"359305877\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LE GRAND Marie\", \"total_billed\": 100, \"bill_paid\": true, \"amount_paid\": 100, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LE COROLLER Marie Jos\\u00e9\", \"beneficiary_dob\": null, \"invoice_date\": \"29-04-2025\", \"security_number\": null, \"invoice_issuer\": \"LE GRAND Marie\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"04-04-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Suivi psychologique\", \"quantity\": 1, \"date_of_service\": \"29-04-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d2c20f79-1cf6-4a3d-8f6d-3e2021b5d4ee_IMG_1067_1_scale_0.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339309569\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Sol\\u00e8ne TAILL\\u00c9\", \"total_billed\": 55, \"bill_paid\": true, \"amount_paid\": 55, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Xavier HARISMENDY\", \"beneficiary_dob\": null, \"invoice_date\": \"27-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Sol\\u00e8ne TAILL\\u00c9\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Tarif Consultation pr\\u00e9sentielle\", \"quantity\": 1, \"date_of_service\": \"27-05-2025\", \"mandatory_coverage\": null, \"amount\": 55}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f9c8d443-8d14-4456-b3b8-2a3ab4951b6c_Facture_POURREAU_Solenne_MAI_25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"139318232\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Caroline Roy\", \"total_billed\": 80, \"bill_paid\": true, \"amount_paid\": 80, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"POURREAU Solenne\", \"beneficiary_dob\": null, \"invoice_date\": \"26-05-2025\", \"security_number\": null, \"invoice_issuer\": \"Caroline Roy\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations individuelles \\u00e0 l'attention de Mme POURREAU Solenne\", \"quantity\": 1, \"date_of_service\": \"09-05-2025\", \"mandatory_coverage\": null, \"amount\": 40}, {\"description\": \"Consultations individuelles \\u00e0 l'attention de Mme POURREAU Solenne\", \"quantity\": 1, \"date_of_service\": \"26-05-2025\", \"mandatory_coverage\": null, \"amount\": 40}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/be829c29-7c16-413a-8a40-94c8e53876ae_facture_2025_-_38_Charlotte_Pelat_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"759357197\", \"rpps_number\": \"10008956947\", \"doctor_name\": \"Agn\\u00e8s Falabr\\u00e8gues\", \"total_billed\": 180.0, \"bill_paid\": true, \"amount_paid\": 180.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Charlotte Pelat\", \"beneficiary_dob\": null, \"invoice_date\": \"04-04-2025\", \"security_number\": null, \"invoice_issuer\": \"Agn\\u00e8s Falabr\\u00e8gues\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultations 1h\", \"quantity\": 1, \"date_of_service\": \"25-03-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}, {\"description\": \"Consultations 1h\", \"quantity\": 1, \"date_of_service\": \"01-04-2025\", \"mandatory_coverage\": null, \"amount\": 90.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/82579f0b-31df-4fe0-ad9a-03a607e3cf38_IMG_2866_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"689304590\", \"rpps_number\": \"10009380287\", \"finess_number\": null, \"doctor_name\": \"Laurence RICHTER\", \"total_billed\": 240.0, \"bill_paid\": null, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Hassania DOUAI\", \"beneficiary_dob\": null, \"invoice_date\": \"10-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Laurence RICHTER\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Entretiens psychoth\\u00e9rapiques du 09/01/2025 ; 30/01/2025 ; 14/02/2025 ; 06/03/2025\", \"quantity\": 1, \"date_of_service\": \"09-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Entretiens psychoth\\u00e9rapiques du 09/01/2025 ; 30/01/2025 ; 14/02/2025 ; 06/03/2025\", \"quantity\": 1, \"date_of_service\": \"30-01-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Entretiens psychoth\\u00e9rapiques du 09/01/2025 ; 30/01/2025 ; 14/02/2025 ; 06/03/2025\", \"quantity\": 1, \"date_of_service\": \"14-02-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}, {\"description\": \"Entretiens psychoth\\u00e9rapiques du 09/01/2025 ; 30/01/2025 ; 14/02/2025 ; 06/03/2025\", \"quantity\": 1, \"date_of_service\": \"06-03-2025\", \"mandatory_coverage\": null, \"amount\": 60.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/ea269ad7-d799-4643-ba4b-305cc0e03296_Facture_psy_19032025_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"389308479\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Christel BAUNF\", \"total_billed\": 200.0, \"bill_paid\": true, \"amount_paid\": 200.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"VEUILLET Camille\", \"beneficiary_dob\": null, \"invoice_date\": null, \"security_number\": null, \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"08-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"22-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"05-02-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"Consultation psychologique\", \"quantity\": 1, \"date_of_service\": \"19-03-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/4a3ff4e4-1c77-40c1-9af9-0064331d2c09_IMG_20250227_090905_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"319319547\", \"rpps_number\": \"10009659987\", \"finess_number\": null, \"doctor_name\": \"CHAVATTE VALENTINE\", \"total_billed\": 41, \"bill_paid\": true, \"amount_paid\": 41, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Pary Agn\\u00e8s\", \"beneficiary_dob\": null, \"invoice_date\": \"26-02-2025\", \"security_number\": null, \"invoice_issuer\": \"CENTRE DE L'OBESITE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation individuelle\", \"quantity\": 1, \"date_of_service\": \"26-02-2025\", \"mandatory_coverage\": null, \"amount\": 41}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d8cb9012-972e-429a-a099-0099a5bd7d6e_Attestation_mutuelle_M.LUCAS_Julien_01-25_1_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"339302606\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Marie-Line RENARD\", \"total_billed\": 100, \"bill_paid\": true, \"amount_paid\": 100, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LUCAS Julien\", \"beneficiary_dob\": \"27-09-1986\", \"invoice_date\": \"23-01-2025\", \"security_number\": null, \"invoice_issuer\": \"Cabinet de Psychologie & de Sant\\u00e9 Mentale au Travail\", \"currency\": \"EUR\", \"items\": [{\"description\": \"consultation individuelle de souffrance au travail\", \"quantity\": 1, \"date_of_service\": \"10-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"consultation individuelle de souffrance au travail\", \"quantity\": 1, \"date_of_service\": \"23-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/758e3dfb-e217-4cc8-aeea-dcbd8348dfdb_IMG_1294_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": null, \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Alexandra DENOYELLE\", \"total_billed\": 120, \"bill_paid\": true, \"amount_paid\": 120, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"EMMA EURIN\", \"beneficiary_dob\": null, \"invoice_date\": \"31-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Alexandra DENOYELLE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SEANCE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"10-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}, {\"description\": \"SEANCE PSYCHOTHERAPIE\", \"quantity\": 1, \"date_of_service\": \"31-03-2025\", \"mandatory_coverage\": null, \"amount\": 60}], \"prescripteur_finess_number\": null, \"is_handwriting\": false, \"is_rotated\": false}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/a742dc29-6914-43ef-9040-e51de050215c_IMG_4055_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"649306651\", \"rpps_number\": \"10009615211\", \"finess_number\": null, \"doctor_name\": \"Candice VERON MAUDOIGT\", \"total_billed\": 250, \"bill_paid\": true, \"amount_paid\": 250, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Vieillescazes Olga\", \"beneficiary_dob\": null, \"invoice_date\": \"17-03-2025\", \"security_number\": null, \"invoice_issuer\": \"Candice VERON MAUDOIGT\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie du 06 janvier 2025\", \"quantity\": 1, \"date_of_service\": \"06-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie du 20 janvier 2025\", \"quantity\": 1, \"date_of_service\": \"20-01-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie du 03 f\\u00e9vrier 2025\", \"quantity\": 1, \"date_of_service\": \"03-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie du 03 mars 2025\", \"quantity\": 1, \"date_of_service\": \"03-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie du 17 mars 2025\", \"quantity\": 1, \"date_of_service\": \"17-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/b917013e-9285-4a98-9f61-f8eec0e6a66d_Fiche_de_soin_15.03.25_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"759356884\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ZHU YIFEI\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"LEFRANC Elorence\", \"insured_dob\": \"27-02-1992\", \"beneficiary_name\": \"LEFRANC Elorence\", \"beneficiary_dob\": \"27-02-1992\", \"invoice_date\": \"15-03-2025\", \"security_number\": \"292025927130939\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"APS\", \"quantity\": null, \"date_of_service\": \"05-04-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/f5291dbb-2f46-4a61-9eb6-a5cd9b6ae81a_Dispositif_mon_Psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"929326494\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BEN DAAYA SALOUA\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"DAVOUDI Fahimeh\", \"insured_dob\": \"25-03-1968\", \"beneficiary_name\": \"Davoudi Fakhreh\", \"beneficiary_dob\": \"25-03-1968\", \"invoice_date\": \"18-03-2022\", \"security_number\": \"268039920404993\", \"invoice_issuer\": \"BEN DAAYA SALOUA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"S\\u00e9ance de psychoth\\u00e9rapie\", \"quantity\": 1, \"date_of_service\": \"18-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/8cc76d23-795e-48fc-947b-4f17cf5f9f6b_FACTURE_PSY_VAL_1_1_scale_0.7.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"709300693\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Christine ANSELEM\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"JELIE Aude\", \"insured_dob\": null, \"beneficiary_name\": \"AUBERT Valentin\", \"beneficiary_dob\": \"08-11-2013\", \"invoice_date\": \"02-03-2025\", \"security_number\": \"279077055008177\", \"invoice_issuer\": \"Christine ANSELEM\", \"currency\": \"EUR\", \"items\": [{\"description\": \"EEP\", \"quantity\": null, \"date_of_service\": \"02-03-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/e4a00d11-d2cc-46bd-bdb1-e28e6f85ac33_doc12584820250430152210_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"069324788\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"SCOLARI DAMIEN\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"DEHILI Nora\", \"beneficiary_dob\": null, \"invoice_date\": \"30-04-2025\", \"security_number\": \"276028302301750\", \"invoice_issuer\": \"SCOLARI DAMIEN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"APS\", \"quantity\": null, \"date_of_service\": \"30-04-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d67e4a4f-fd88-4453-a63c-33d00016a710_IMG_20250610_174409_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychiatrie\", \"adeli_number\": \"751932864\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"REMY DEBRUN\", \"total_billed\": 100, \"bill_paid\": true, \"amount_paid\": 100, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"FREVILLE Nicolas\", \"beneficiary_dob\": \"19-03-1993\", \"invoice_date\": \"10-06-2025\", \"security_number\": \"193036204132668\", \"invoice_issuer\": \"REMY DEBRUN\", \"currency\": \"EUR\", \"items\": [{\"description\": \"CNP\", \"quantity\": null, \"date_of_service\": \"10-06-2025\", \"mandatory_coverage\": null, \"amount\": 100}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/eb766f71-f399-44c0-8d36-895f4a440548_CamScanner_11-02-2025_14.09_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"839301041\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DEOUS YVELINE\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"NAFISSA GIRAUD\", \"insured_dob\": \"05-01-1986\", \"beneficiary_name\": \"NAFISSA GIRAUD\", \"beneficiary_dob\": \"05-01-1986\", \"invoice_date\": \"22-11-2024\", \"security_number\": \"286011305517030\", \"invoice_issuer\": \"DEOUS YVELINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"APS\", \"quantity\": null, \"date_of_service\": \"27-12-2024\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/9c3f39c0-af04-46f7-971f-07c93aca05b9_feuille_se_soin_psychologue_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"759360514\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"ROMITI GHJUVANNA\", \"total_billed\": 200, \"bill_paid\": true, \"amount_paid\": 200, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"TRINGALI ANAURY\", \"beneficiary_dob\": \"11-04-1987\", \"invoice_date\": \"04-05-2025\", \"security_number\": \"187048306104178\", \"invoice_issuer\": \"MADAME ROMITI GHJUVANNA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"EEP\", \"quantity\": null, \"date_of_service\": \"14-05-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"APS\", \"quantity\": null, \"date_of_service\": \"21-05-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"APS\", \"quantity\": null, \"date_of_service\": \"28-05-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}, {\"description\": \"APS\", \"quantity\": null, \"date_of_service\": \"04-06-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/7adda4bb-e0e5-4d24-9f05-35fcb244e9ae_IMG_20250506_140735_1_scale_0.6.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"139337448\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Michel Dominique\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"MONIER Fanny\", \"beneficiary_dob\": \"13-11-1990\", \"invoice_date\": \"08-02-2025\", \"security_number\": \"290111300505062\", \"invoice_issuer\": \"Michel Dominique\", \"currency\": \"EUR\", \"items\": [{\"description\": \"APS\", \"quantity\": null, \"date_of_service\": \"08-02-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/c3194b14-92ec-4f41-a2dc-582c6e28a252_feuille_de_soin_psy_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"029304771\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Jordan LEFEBVRE\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"CAMINATI M\\u00e9gane\", \"beneficiary_dob\": \"24-05-1994\", \"invoice_date\": \"16-04-2025\", \"security_number\": \"294055432930141\", \"invoice_issuer\": \"Jordan LEFEBVRE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"EEP\", \"quantity\": null, \"date_of_service\": \"16-04-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/77ff8a6e-6cb7-4e8f-b80c-dbd245cf3728_20250412_103057 copy_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"349327601\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Barros Soares Elsa\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"HOURNIEZ Audrey\", \"insured_dob\": \"13-11-1980\", \"beneficiary_name\": \"HOURNIEZ Audrey\", \"beneficiary_dob\": \"13-11-1980\", \"invoice_date\": \"31-03-2025\", \"security_number\": \"280116204804380\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"EEP\", \"quantity\": null, \"date_of_service\": \"31-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/0f056d2e-af3f-4297-98d9-a0bba103a671_Feuille_de_soins_psy_2_Camille_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"349311449\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"BEQUE C\\u00e9line\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"POURCHOT-SAURA Camille\", \"insured_dob\": \"23-08-2015\", \"beneficiary_name\": \"POURCHOT-SAURA Camille\", \"beneficiary_dob\": \"23-08-2015\", \"invoice_date\": \"12-03-2025\", \"security_number\": \"278056806607131\", \"invoice_issuer\": \"BEQUE C\\u00e9line\", \"currency\": \"EUR\", \"items\": [{\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"12-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}, {\"description\": \"Consultation\", \"quantity\": 1, \"date_of_service\": \"26-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/6c7aab9a-6a37-428b-90b8-72a01611d6ba_Screenshot_2025-06-14-13-56-41-690_com.miui.gallery-edit_1_scale_1.0.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"331050831\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"Monique MOREL-VAGNER\", \"total_billed\": 50.2, \"bill_paid\": true, \"amount_paid\": null, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"N\\u00c9ZONDET St\\u00e9phane\", \"beneficiary_dob\": \"26-09-1971\", \"invoice_date\": \"12-06-2025\", \"security_number\": \"171094019208965\", \"invoice_issuer\": null, \"currency\": \"EUR\", \"items\": [{\"description\": \"TCS\", \"quantity\": null, \"date_of_service\": \"12-06-2025\", \"mandatory_coverage\": null, \"amount\": 50.2}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/d487dfb6-e4b2-44d1-82d6-a71c9433c49a_psy_1_scale_1.5.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"449310481\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"HELENE CHOTARD\", \"total_billed\": 50, \"bill_paid\": true, \"amount_paid\": 50, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"LETERTRE Anthony\", \"beneficiary_dob\": \"03-04-1979\", \"invoice_date\": \"10-03-2025\", \"security_number\": \"179045009901157\", \"invoice_issuer\": \"HELENE CHOTARD\", \"currency\": \"EUR\", \"items\": [{\"description\": \"EEP\", \"quantity\": null, \"date_of_service\": \"10-03-2025\", \"mandatory_coverage\": null, \"amount\": 50}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/aeef6346-9d89-426d-96bb-32c701bc9f1e_CamScanner_11-02-2025_14.07_1_scale_0.8.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"839301041\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"DEOUS YVELINE\", \"total_billed\": 50.0, \"bill_paid\": true, \"amount_paid\": 50.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": null, \"insured_dob\": null, \"beneficiary_name\": \"Nafissa GIRAUD\", \"beneficiary_dob\": \"05-01-1986\", \"invoice_date\": \"27-01-2025\", \"security_number\": \"286011305517030\", \"invoice_issuer\": \"DEOUS YVELINE\", \"currency\": \"EUR\", \"items\": [{\"description\": \"APS\", \"quantity\": null, \"date_of_service\": \"27-01-2025\", \"mandatory_coverage\": null, \"amount\": 50.0}], \"prescripteur_finess_number\": null}" } ], [ { "role": "system", "content": "You are an advanced AI agent created by Rizlum AI. You are designed to extract structured information from health invoices with high accuracy. Your task is to parse invoices and return only the requested fields in a strict JSON format.\n\n### **General Instructions**\n1. **Extract Only the Specified Fields**: Do not include extra information.\n2. **Do Not Guess or hallucinate if information is missing or represented by placeholders (e.g., dots, dashes).**\n3. **Ignore irrelevant fields (e.g., address, SIRET, membership numbers).**.\n4. **Ensure Strictly Valid JSON Output**: Do not return additional text or explanations.\n5. **Field Relationship Guidance**: Formula: total_bill = mandatory_coverage + complementary_coverage + client_part. Instruction: Prioritize extracting all values directly and only if they appear on the invoice. This formula is a guide to verify the consistency of extracted numbers, not a command to calculate a missing total_bill\n\n### **Handling Ambiguous Cases**\n- **Adeli Number**: If a 9-digit number appears without the keyword 'Adeli', check if it matches the Adeli number format and is associated with a recognized healthcare professional.\n- **Doctor Selection**:\n - If the invoice shows multiple doctors, exclude any doctor that is visibly crossed out.\n - Prioritize doctor information (e.g., name, Adeli, RPPS) within a stamp (identified by visual stamp features like borders or official markings) over unstamped doctor blocks. Exclude unstamped doctor information if a stamped block exists.\n- **Item Selection in Tables**:\n - If multiple items or acts are listed, extract only those that are highlighted (e.g., marked with color).\n - Ignore all other items that are not explicitly marked or priced.\n- **Date**:\n - Distinguish carefully between similar characters: treat '/1' as '1' (e.g., January), not '11' (e.g., November), by focusing on stroke separation and context rather than assuming a slash implies a specific number.\n" }, { "role": "user", "content": [ { "type": "image", "image": "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1/bec6226a-2abc-41fd-a602-1f9a077a5fab_CamScanner_11-02-2025_14.57_1_scale_0.9.jpg" }, { "type": "text", "text": "Extract the following structured information from the provided invoice. Fill in only existing values.\nStrictly return a valid JSON following this schema:\n\n**Json schema**\n{\n \"type\": \"object \",\n \"properties\": {\n \"is_bill\": {\n \"type\": \"boolean\",\n \"description\": \"True if the document is an invoice, false otherwise.\"\n },\n \"profession\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Type of healthcare profession, if it is presented in the list [Optique, Kin\u00e9siologie, Kin\u00e9sith\u00e9rapie, Pharmacie, Biologie, Psychologie, Infirmier, Ost\u00e9opathie, Dentaire, Sage-femme, Sophrologie, Soins hospitaliers, Orthop\u00e9die, Podologie, Di\u00e9t\u00e9tique, Radiologie, Orthophonie, P\u00e9diatrie, Assurance Maladie, Pompes fun\u00e8bres, Laboratoire, Gyn\u00e9cologie-obst\u00e9trique, Chiropractie, Psychomotricit\u00e9, Ost\u00e9odensitom\u00e9trie, Pneumologie, Vaccins, Sevrage tabagique, Contraception, Hom\u00e9opathie, Acupunture], Unknown otherwise.\"\n },\n \"adeli_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Adeli number (9-digit identifier) associated with the healthcare provider\"\n },\n \"rpps_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"11 digits identifier, indicated after the term 'RPPS'\"\n },\n \"finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"9 digits identifier, indicated after one of the terms in list ['finess', 'identifiant CPAM']\"\n },\n \"doctor_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the doctor\"\n },\n \"prescripteur_finess_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Finess number of the prescriber in the invoice (9 digits identifier, indicated after the term 'finess')\"\n },\n \"total_billed\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The total amount billed on the invoice\"\n },\n \"bill_paid\": {\n \"type\": \"boolean\",\n \"description\": \"True if the invoice has been paid, false otherwise (Look for terms like: 'acquitt\u00e9e', 'pay\u00e9e', 'quittance', 'r\u00e9gl\u00e9e', 'certifie avoir re\u00e7u le r\u00e8glement')\"\n },\n \"amount_paid\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The amount paid for the invoice\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by compulsory health insurance (indicated after terms like 'AMO', 'Rbmt RO', 'CAISSE', 'Noemie', etc.)\"\n },\n \"complementary_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by complementary insurance (indicated after terms like 'AMC', 'RC', 'Mutuelle')\"\n },\n \"client_part\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount paid by client (indicated after terms like 'ASSURE', 'Part Client', 'Part Assur\u00e9')\"\n },\n \"remaining_payment\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"The remaining balance to be paid by the beneficiary if the invoice is unpaid.\"\n },\n \"insured_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the insured person (indicated after terms like 'Assure')\"\n },\n \"insured_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the insured person (format: dd-mm-yyyy)\"\n },\n \"beneficiary_name\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Full name of the invoice beneficiary\"\n },\n \"beneficiary_dob\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of birth of the beneficiary (format: dd-mm-yyyy)\"\n },\n \"invoice_date\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of the invoice (format: dd-mm-yyyy)\"\n },\n \"security_number\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Social Security number (13 or 15 digit identifier, indicated after terms like 'S\u00e9curit\u00e9 Social' ou 'N\u00b0 INSEE' ou 'N\u00b0 SS')\"\n },\n \"invoice_issuer\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Name or organization issuing the invoice or providing the service\"\n },\n \"currency\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Currency used (e.g., EUR, USD)\"\n },\n \"items\": {\n \"type\": \"array\",\n \"description\": \"List of items or services included in the invoice.\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"description\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Description of the item or service.\"\n },\n \"quantity\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Quantity of the item or service.\"\n },\n \"date_of_service\": {\n \"type\": [\"string\", \"null\"],\n \"description\": \"Date of service (when the item was provided), in format dd-mm-yyyy.\"\n },\n \"mandatory_coverage\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Amount covered by mandatory health insurance for this item.\"\n },\n \"amount\": {\n \"type\": [\"number\", \"null\"],\n \"description\": \"Total amount for the item (unit price * quantity).\"\n }\n }\n }\n }\n }\n}\n\n" } ] }, { "role": "assistant", "content": "{\"is_bill\": true, \"profession\": \"Psychologie\", \"adeli_number\": \"831007976\", \"rpps_number\": null, \"finess_number\": null, \"doctor_name\": \"LARDE PATRICIA\", \"total_billed\": 100.0, \"bill_paid\": true, \"amount_paid\": 100.0, \"mandatory_coverage\": null, \"complementary_coverage\": null, \"client_part\": null, \"remaining_payment\": null, \"insured_name\": \"GIRAUD NAPISSA\", \"insured_dob\": \"05-01-1986\", \"beneficiary_name\": \"GIRAUD NAPISSA\", \"beneficiary_dob\": \"05-01-1986\", \"invoice_date\": \"15-01-2025\", \"security_number\": \"286011305517030\", \"invoice_issuer\": \"LARDE PATRICIA\", \"currency\": \"EUR\", \"items\": [{\"description\": \"SOINS\", \"quantity\": null, \"date_of_service\": \"15-01-2025\", \"mandatory_coverage\": null, \"amount\": 100.0}], \"prescripteur_finess_number\": null}" } ] ]